diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000000000000000000000000000000000000..8ab974a17e573e96aaf5232f411e5cbd117da46b --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + 1694488935255 + + + + + + + + + \ No newline at end of file diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000000000000000000000000000000000..97cfb6699eaa6a693549f4ff66300fbf793e356a --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,8 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Chase" + given-names: "Harrison" +title: "LangChain" +date-released: 2022-10-17 +url: "https://github.com/hwchase17/langchain" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..50364a1f8cdbf4cd3279644787b6c713b97ba11e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.9 +RUN useradd -m -u 1000 user +USER user +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH +WORKDIR $HOME/app +COPY --chown=user . $HOME/app +COPY ./requirements.txt ~/app/requirements.txt +RUN pip install -r requirements.txt +COPY . . +CMD ["python", "app.py"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d5c9d8189aa990e261a8fe9af0120d16018b6abf --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) Harrison Chase + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..28dd38702e932574152d917022ea696c3577b62e --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +.PHONY: all clean format lint test tests test_watch integration_tests help + +all: help + +coverage: + poetry run pytest --cov \ + --cov-config=.coveragerc \ + --cov-report xml \ + --cov-report term-missing:skip-covered + +clean: docs_clean + +docs_build: + cd docs && poetry run make html + +docs_clean: + cd docs && poetry run make clean + +docs_linkcheck: + poetry run linkchecker docs/_build/html/index.html + +format: + poetry run black . + poetry run ruff --select I --fix . + +lint: + poetry run mypy . + poetry run black . --check + poetry run ruff . + +test: + poetry run pytest tests/unit_tests + +tests: + poetry run pytest tests/unit_tests + +test_watch: + poetry run ptw --now . -- tests/unit_tests + +integration_tests: + poetry run pytest tests/integration_tests + +help: + @echo '----' + @echo 'coverage - run unit tests and generate coverage report' + @echo 'docs_build - build the documentation' + @echo 'docs_clean - clean the documentation build artifacts' + @echo 'docs_linkcheck - run linkchecker on the documentation' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'test_watch - run unit tests in watch mode' + @echo 'integration_tests - run integration tests' diff --git a/Procfile b/Procfile new file mode 100644 index 0000000000000000000000000000000000000000..ca6e941cbc79a8519bfef7475ea15c12cb0b5550 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn app:app diff --git a/README.md b/README.md index c2f8410e22003c81e7719e8fb306913d2d89368d..8e65b17d14474bf09d19b076e83e78f890b8bf49 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,82 @@ ---- -title: Marketing Analytics Bot -emoji: 🦀 -colorFrom: purple -colorTo: purple -sdk: docker -pinned: false -license: openrail ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# 🦜️🔗 LangChain + +⚡ Building applications with LLMs through composability ⚡ + +[![lint](https://github.com/hwchase17/langchain/actions/workflows/lint.yml/badge.svg)](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [![test](https://github.com/hwchase17/langchain/actions/workflows/test.yml/badge.svg)](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [![linkcheck](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI)](https://twitter.com/langchainai) [![](https://dcbadge.vercel.app/api/server/6adMQxSpJS?compact=true&style=flat)](https://discord.gg/6adMQxSpJS) + +**Production Support:** As you move your LangChains into production, we'd love to offer more comprehensive support. +Please fill out [this form](https://forms.gle/57d8AmXBYp8PP8tZA) and we'll set up a dedicated support Slack channel. + +## Quick Install + +`pip install langchain` + +## 🤔 What is this? + +Large language models (LLMs) are emerging as a transformative technology, enabling +developers to build applications that they previously could not. +But using these LLMs in isolation is often not enough to +create a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge. + +This library is aimed at assisting in the development of those types of applications. Common examples of these types of applications include: + +**❓ Question Answering over specific documents** + +- [Documentation](https://langchain.readthedocs.io/en/latest/use_cases/question_answering.html) +- End-to-end Example: [Question Answering over Notion Database](https://github.com/hwchase17/notion-qa) + +**💬 Chatbots** + +- [Documentation](https://langchain.readthedocs.io/en/latest/use_cases/chatbots.html) +- End-to-end Example: [Chat-LangChain](https://github.com/hwchase17/chat-langchain) + +**🤖 Agents** + +- [Documentation](https://langchain.readthedocs.io/en/latest/use_cases/agents.html) +- End-to-end Example: [GPT+WolframAlpha](https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain) + +## 📖 Documentation + +Please see [here](https://langchain.readthedocs.io/en/latest/?) for full documentation on: + +- Getting started (installation, setting up the environment, simple examples) +- How-To examples (demos, integrations, helper functions) +- Reference (full API docs) +- Resources (high-level explanation of core concepts) + +## 🚀 What can this help with? + +There are six main areas that LangChain is designed to help with. +These are, in increasing order of complexity: + +**📃 LLMs and Prompts:** + +This includes prompt management, prompt optimization, generic interface for all LLMs, and common utilities for working with LLMs. + +**🔗 Chains:** + +Chains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications. + +**📚 Data Augmented Generation:** + +Data Augmented Generation involves specific types of chains that first interact with an external datasource to fetch data to use in the generation step. Examples of this include summarization of long pieces of text and question/answering over specific data sources. + +**🤖 Agents:** + +Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents. + +**🧠 Memory:** + +Memory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory. + +**🧐 Evaluation:** + +[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this. + +For more information on these concepts, please see our [full documentation](https://langchain.readthedocs.io/en/latest/?). + +## 💁 Contributing + +As an open source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infra, or better documentation. + +For detailed information on how to contribute, see [here](.github/CONTRIBUTING.md). diff --git a/__pycache__/main.cpython-39.pyc b/__pycache__/main.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..211604739b85118492b7c16f321ff0c88d5d9b99 Binary files /dev/null and b/__pycache__/main.cpython-39.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..6de68b3286a738102f8b9648cab1e713d16a5c67 --- /dev/null +++ b/app.py @@ -0,0 +1,102 @@ +"""Python file to serve as the frontend""" +from datetime import datetime + +import wandb + +from langchain.agents.agent_toolkits.sql.simple_sql import create_simple_sql_agent_excutor +from langchain.callbacks import WandbCallbackHandler, CallbackManager, StdOutCallbackHandler +from langchain.document_loaders import WebBaseLoader +from langchain.embeddings import OpenAIEmbeddings +# import faiss +from langchain import OpenAI, FAISS, LLMChain +from langchain.chains import VectorDBQAWithSourcesChain +import pickle + +# root_dir = "/Users/jiefeng/Dropbox/Apps/admixer/neon_scrapy/data/" +# index_path = "".join([root_dir, "docs.index"]) +# fass_store_path = "".join([root_dir, "faiss_store.pkl"]) +# Load the LangChain. + +from langchain.prompts import PromptTemplate +import os +from langchain import OpenAI, VectorDBQA +from flask import Flask, request, jsonify +from flask_cors import CORS, cross_origin +from langchain.agents.agent_toolkits.sql.toolkit import SimpleSQLDatabaseToolkit +from langchain.sql_database import SQLDatabase +from langchain.llms.openai import OpenAI + +# create your SocketIO instance +# handle chat messages + + +url = "https://langchain.readthedocs.io/en/latest/" +os.environ["OPENAI_API_KEY"] = "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" +os.environ["WANDB_API_KEY"] = "7e3c65043f06598e45810ffdd5588f048ec870db" +qa = None + +db = SQLDatabase.from_uri( + "postgresql+psycopg2://macbttqtwpbkxg:8e00539601577e6d3e73f4781d0d71913dc5a165a9b75229cf930abe79ddaae3@ec2-54-173-77-184.compute-1.amazonaws.com:5432/d8cb6alpt8ft06") +toolkit = SimpleSQLDatabaseToolkit(db=db) + +session_group = datetime.now().strftime("%m.%d.%Y_%H.%M.%S") +# wandb_callback = WandbCallbackHandler( +# job_type="inference", +# project="marketing_questions", +# group=f"minimal_{session_group}", +# name="llm", +# tags=["test"], +# ) +manager = CallbackManager([StdOutCallbackHandler()]) + + +llm = OpenAI(temperature=0, + model_name="gpt-4", + callback_manager=manager, + verbose=True, + ) + +agent_executor = create_simple_sql_agent_excutor( + llm=llm, + toolkit=toolkit, + callback_manager=manager, + verbose=True +) +# agent_executor.run("What are the most popular pages visited by our visitors?") + +# agent_executor.run("how many visitors profiles are from the Unite States?") +# From here down is all the StreamLit UI. + + +app = Flask(__name__) +cors = CORS(app) + +@app.route('/') +@cross_origin() +def hello_world(): + return 'Hello, World!' + + +@app.route('/api/ask', methods=['POST']) +@cross_origin() +def submit(): + print("request received") + data = request.get_json() + question = data['question'] + sql_data_result = None + if question: + print(question) + sql_data_result = agent_executor.run(question) + #wandb_callback.flush_tracker(agent_executor, reset=False, finish=True) + + # chartPrompt = PromptTemplate( + # template="What chart is best for the data {data}?", input_variables=["data"]) + # + # chartChain = LLMChain(llm=llm, prompt=chartPrompt) + # chartChain.run(sql_data_result) + result = jsonify(sql_data_result) + return result + + +if __name__ == '__main__': + app.run(port=7860) diff --git a/langchain/__init__.py b/langchain/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..86bf4f7ac0bd44e147ab83b0d29dddfd1596df8b --- /dev/null +++ b/langchain/__init__.py @@ -0,0 +1,109 @@ +"""Main entrypoint into package.""" + +from typing import Optional + +from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain +from langchain.cache import BaseCache +from langchain.callbacks import ( + set_default_callback_manager, + set_handler, + set_tracing_callback_manager, +) +from langchain.chains import ( + ConversationChain, + LLMBashChain, + LLMChain, + LLMCheckerChain, + LLMMathChain, + PALChain, + QAWithSourcesChain, + SQLDatabaseChain, + VectorDBQA, + VectorDBQAWithSourcesChain, +) +from langchain.docstore import InMemoryDocstore, Wikipedia +from langchain.llms import ( + Anthropic, + Banana, + CerebriumAI, + Cohere, + ForefrontAI, + GooseAI, + HuggingFaceHub, + Modal, + OpenAI, + Petals, + SagemakerEndpoint, + StochasticAI, + Writer, +) +from langchain.llms.huggingface_pipeline import HuggingFacePipeline +from langchain.prompts import ( + BasePromptTemplate, + FewShotPromptTemplate, + Prompt, + PromptTemplate, +) +from langchain.sql_database import SQLDatabase +from langchain.utilities.google_search import GoogleSearchAPIWrapper +from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.searx_search import SearxSearchWrapper +from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper +from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper +from langchain.vectorstores import FAISS, ElasticVectorSearch + +verbose: bool = False +llm_cache: Optional[BaseCache] = None +set_default_callback_manager() + +# For backwards compatibility +SerpAPIChain = SerpAPIWrapper + +__all__ = [ + "LLMChain", + "LLMBashChain", + "LLMCheckerChain", + "LLMMathChain", + "SelfAskWithSearchChain", + "SerpAPIWrapper", + "SerpAPIChain", + "SearxSearchWrapper", + "GoogleSearchAPIWrapper", + "GoogleSerperAPIWrapper", + "WolframAlphaAPIWrapper", + "WikipediaAPIWrapper", + "Anthropic", + "Banana", + "CerebriumAI", + "Cohere", + "ForefrontAI", + "GooseAI", + "Modal", + "OpenAI", + "Petals", + "StochasticAI", + "Writer", + "BasePromptTemplate", + "Prompt", + "FewShotPromptTemplate", + "PromptTemplate", + "ReActChain", + "Wikipedia", + "HuggingFaceHub", + "SagemakerEndpoint", + "HuggingFacePipeline", + "SQLDatabase", + "SQLDatabaseChain", + "FAISS", + "MRKLChain", + "VectorDBQA", + "ElasticVectorSearch", + "InMemoryDocstore", + "ConversationChain", + "VectorDBQAWithSourcesChain", + "QAWithSourcesChain", + "PALChain", + "set_handler", + "set_tracing_callback_manager", +] diff --git a/langchain/__pycache__/__init__.cpython-39.pyc b/langchain/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8d9dee4be590d40277bddca6066e7eb82cef474 Binary files /dev/null and b/langchain/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/__pycache__/cache.cpython-39.pyc b/langchain/__pycache__/cache.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..876339b18f33c344807d7757110aba705fba9dbe Binary files /dev/null and b/langchain/__pycache__/cache.cpython-39.pyc differ diff --git a/langchain/__pycache__/formatting.cpython-39.pyc b/langchain/__pycache__/formatting.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da4a0f3e1b92eb83fa899d856109427d5600d17f Binary files /dev/null and b/langchain/__pycache__/formatting.cpython-39.pyc differ diff --git a/langchain/__pycache__/input.cpython-39.pyc b/langchain/__pycache__/input.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be07bc2ded109ef2bc66afb243497d7d387ba665 Binary files /dev/null and b/langchain/__pycache__/input.cpython-39.pyc differ diff --git a/langchain/__pycache__/python.cpython-39.pyc b/langchain/__pycache__/python.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93cd2520edfd1ed27f8f38e555a64b93c2fa220b Binary files /dev/null and b/langchain/__pycache__/python.cpython-39.pyc differ diff --git a/langchain/__pycache__/requests.cpython-39.pyc b/langchain/__pycache__/requests.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c497731c861e865b5adfea540536c1c370b55e84 Binary files /dev/null and b/langchain/__pycache__/requests.cpython-39.pyc differ diff --git a/langchain/__pycache__/schema.cpython-39.pyc b/langchain/__pycache__/schema.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6625443bd913f7171fa0ff24908c734315fac94e Binary files /dev/null and b/langchain/__pycache__/schema.cpython-39.pyc differ diff --git a/langchain/__pycache__/sql_database.cpython-39.pyc b/langchain/__pycache__/sql_database.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..408d12eb7ddffd9cea71b9d71e76d5b02fc7fe15 Binary files /dev/null and b/langchain/__pycache__/sql_database.cpython-39.pyc differ diff --git a/langchain/__pycache__/text_splitter.cpython-39.pyc b/langchain/__pycache__/text_splitter.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b9b958724ebedfd857f41646ecd43379533ced1 Binary files /dev/null and b/langchain/__pycache__/text_splitter.cpython-39.pyc differ diff --git a/langchain/__pycache__/utils.cpython-39.pyc b/langchain/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2b19568b7c6509582d3ab8e8e9210f110b3c9e8 Binary files /dev/null and b/langchain/__pycache__/utils.cpython-39.pyc differ diff --git a/langchain/agents/__init__.py b/langchain/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fa099db42805de6048d8817d866e380cf7be3c88 --- /dev/null +++ b/langchain/agents/__init__.py @@ -0,0 +1,43 @@ +"""Interface for agents.""" +from langchain.agents.agent import Agent, AgentExecutor +from langchain.agents.agent_toolkits import ( + create_csv_agent, + create_json_agent, + create_openapi_agent, + create_pandas_dataframe_agent, + create_sql_agent, + create_vectorstore_agent, + create_vectorstore_router_agent, +) +from langchain.agents.conversational.base import ConversationalAgent +from langchain.agents.initialize import initialize_agent +from langchain.agents.load_tools import get_all_tool_names, load_tools +from langchain.agents.loading import load_agent +from langchain.agents.mrkl.base import MRKLChain, ZeroShotAgent +from langchain.agents.react.base import ReActChain, ReActTextWorldAgent +from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain +from langchain.agents.tools import Tool, tool + +__all__ = [ + "MRKLChain", + "SelfAskWithSearchChain", + "ReActChain", + "AgentExecutor", + "Agent", + "Tool", + "tool", + "initialize_agent", + "ZeroShotAgent", + "ReActTextWorldAgent", + "load_tools", + "get_all_tool_names", + "ConversationalAgent", + "load_agent", + "create_sql_agent", + "create_json_agent", + "create_openapi_agent", + "create_vectorstore_router_agent", + "create_vectorstore_agent", + "create_pandas_dataframe_agent", + "create_csv_agent", +] diff --git a/langchain/agents/__pycache__/__init__.cpython-39.pyc b/langchain/agents/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46149e2eb29d4d2b553c6571fd516dbb8a6ce82a Binary files /dev/null and b/langchain/agents/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/__pycache__/agent.cpython-39.pyc b/langchain/agents/__pycache__/agent.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..929a2c94cac4628b26a3f9855ccb50763dede723 Binary files /dev/null and b/langchain/agents/__pycache__/agent.cpython-39.pyc differ diff --git a/langchain/agents/__pycache__/initialize.cpython-39.pyc b/langchain/agents/__pycache__/initialize.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30bd8c5abf4352429265901d81f496e291b4cff1 Binary files /dev/null and b/langchain/agents/__pycache__/initialize.cpython-39.pyc differ diff --git a/langchain/agents/__pycache__/load_tools.cpython-39.pyc b/langchain/agents/__pycache__/load_tools.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8db5e50f0a523333ab04a5423ef5069fb8ca1429 Binary files /dev/null and b/langchain/agents/__pycache__/load_tools.cpython-39.pyc differ diff --git a/langchain/agents/__pycache__/loading.cpython-39.pyc b/langchain/agents/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a12f4c2d54087e79c6d1321ebd1402ec0d2bdceb Binary files /dev/null and b/langchain/agents/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/agents/__pycache__/tools.cpython-39.pyc b/langchain/agents/__pycache__/tools.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfeb911ed518c1452c9cf85ba10ed207579504a5 Binary files /dev/null and b/langchain/agents/__pycache__/tools.cpython-39.pyc differ diff --git a/langchain/agents/agent.py b/langchain/agents/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..2958e684f94d2644346d0b62f7d21281e82e9f00 --- /dev/null +++ b/langchain/agents/agent.py @@ -0,0 +1,583 @@ +"""Chain that takes in an input and produces an action and action input.""" +from __future__ import annotations + +import json +import logging +from abc import abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +import yaml +from pydantic import BaseModel, root_validator + +from langchain.agents.tools import InvalidTool +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.input import get_color_mapping +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import AgentAction, AgentFinish, BaseMessage, AgentClarify +from langchain.tools.base import BaseTool + +logger = logging.getLogger() + + +class Agent(BaseModel): + """Class responsible for calling the language model and deciding the action. + + This is driven by an LLMChain. The prompt in the LLMChain MUST include + a variable called "agent_scratchpad" where the agent can put its + intermediary work. + """ + + llm_chain: LLMChain + allowed_tools: Optional[List[str]] = None + return_values: List[str] = ["output"] + + @abstractmethod + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: + """Extract tool and tool input from llm output.""" + + def _fix_text(self, text: str) -> str: + """Fix the text.""" + raise ValueError("fix_text not implemented for this agent.") + + @property + def _stop(self) -> List[str]: + return [ + f"\n{self.observation_prefix.rstrip()}", + f"\n\t{self.observation_prefix.rstrip()}", + ] + + def _construct_scratchpad( + self, intermediate_steps: List[Tuple[AgentAction, str]] + ) -> Union[str, List[BaseMessage]]: + """Construct the scratchpad that lets the agent continue its thought process.""" + thoughts = "" + for action, observation in intermediate_steps: + thoughts += action.log + thoughts += f"\n{self.observation_prefix}{observation}\n{self.llm_prefix}" + return thoughts + + def _get_next_action(self, full_inputs: Dict[str, str]) -> AgentAction: + full_output = self.llm_chain.predict(**full_inputs) + parsed_output = self._extract_tool_and_input(full_output) + while parsed_output is None: + full_output = self._fix_text(full_output) + full_inputs["agent_scratchpad"] += full_output + output = self.llm_chain.predict(**full_inputs) + full_output += output + parsed_output = self._extract_tool_and_input(full_output) + return AgentAction( + tool=parsed_output[0], tool_input=parsed_output[1], log=full_output + ) + + async def _aget_next_action(self, full_inputs: Dict[str, str]) -> AgentAction: + full_output = await self.llm_chain.apredict(**full_inputs) + parsed_output = self._extract_tool_and_input(full_output) + while parsed_output is None: + full_output = self._fix_text(full_output) + full_inputs["agent_scratchpad"] += full_output + output = await self.llm_chain.apredict(**full_inputs) + full_output += output + parsed_output = self._extract_tool_and_input(full_output) + return AgentAction( + tool=parsed_output[0], tool_input=parsed_output[1], log=full_output + ) + + def plan( + self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any + ) -> Union[AgentAction, AgentFinish, AgentClarify]: + """Given input, decided what to do. + + Args: + intermediate_steps: Steps the LLM has taken to date, + along with observations + **kwargs: User inputs. + + Returns: + Action specifying what tool to use. + """ + full_inputs = self.get_full_inputs(intermediate_steps, **kwargs) + action = self._get_next_action(full_inputs) + if action.tool == self.finish_tool_name: + return AgentFinish({"output": action.tool_input}, action.log) + return action + + async def aplan( + self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any + ) -> Union[AgentAction, AgentFinish]: + """Given input, decided what to do. + + Args: + intermediate_steps: Steps the LLM has taken to date, + along with observations + **kwargs: User inputs. + + Returns: + Action specifying what tool to use. + """ + full_inputs = self.get_full_inputs(intermediate_steps, **kwargs) + action = await self._aget_next_action(full_inputs) + if action.tool == self.finish_tool_name: + return AgentFinish({"output": action.tool_input}, action.log) + return action + + def get_full_inputs( + self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any + ) -> Dict[str, Any]: + """Create the full inputs for the LLMChain from intermediate steps.""" + thoughts = self._construct_scratchpad(intermediate_steps) + new_inputs = {"agent_scratchpad": thoughts, "stop": self._stop} + full_inputs = {**kwargs, **new_inputs} + return full_inputs + + def prepare_for_new_call(self) -> None: + """Prepare the agent for new call, if needed.""" + pass + + @property + def finish_tool_name(self) -> str: + """Name of the tool to use to finish the chain.""" + return "Final Answer" + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return list(set(self.llm_chain.input_keys) - {"agent_scratchpad"}) + + @root_validator() + def validate_prompt(cls, values: Dict) -> Dict: + """Validate that prompt matches format.""" + prompt = values["llm_chain"].prompt + if "agent_scratchpad" not in prompt.input_variables: + logger.warning( + "`agent_scratchpad` should be a variable in prompt.input_variables." + " Did not find it, so adding it at the end." + ) + prompt.input_variables.append("agent_scratchpad") + if isinstance(prompt, PromptTemplate): + prompt.template += "\n{agent_scratchpad}" + elif isinstance(prompt, FewShotPromptTemplate): + prompt.suffix += "\n{agent_scratchpad}" + else: + raise ValueError(f"Got unexpected prompt type {type(prompt)}") + return values + + @property + @abstractmethod + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + + @property + @abstractmethod + def llm_prefix(self) -> str: + """Prefix to append the LLM call with.""" + + @classmethod + @abstractmethod + def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate: + """Create a prompt for this class.""" + + @classmethod + def _validate_tools(cls, tools: Sequence[BaseTool]) -> None: + """Validate that appropriate tools are passed in.""" + pass + + @classmethod + def from_llm_and_tools( + cls, + llm: BaseLLM, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, + ) -> Agent: + """Construct an agent from an LLM and tools.""" + cls._validate_tools(tools) + llm_chain = LLMChain( + llm=llm, + prompt=cls.create_prompt(tools), + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + + def return_stopped_response( + self, + early_stopping_method: str, + intermediate_steps: List[Tuple[AgentAction, str]], + **kwargs: Any, + ) -> AgentFinish: + """Return response when agent has been stopped due to max iterations.""" + if early_stopping_method == "force": + # `force` just returns a constant string + return AgentFinish({"output": "Agent stopped due to max iterations."}, "") + elif early_stopping_method == "generate": + # Generate does one final forward pass + thoughts = "" + for action, observation in intermediate_steps: + thoughts += action.log + thoughts += ( + f"\n{self.observation_prefix}{observation}\n{self.llm_prefix}" + ) + # Adding to the previous steps, we now tell the LLM to make a final pred + thoughts += ( + "\n\nI now need to return a final answer based on the previous steps:" + ) + new_inputs = {"agent_scratchpad": thoughts, "stop": self._stop} + full_inputs = {**kwargs, **new_inputs} + full_output = self.llm_chain.predict(**full_inputs) + # We try to extract a final answer + parsed_output = self._extract_tool_and_input(full_output) + if parsed_output is None: + # If we cannot extract, we just return the full output + return AgentFinish({"output": full_output}, full_output) + tool, tool_input = parsed_output + if tool == self.finish_tool_name: + # If we can extract, we send the correct stuff + return AgentFinish({"output": tool_input}, full_output) + else: + # If we can extract, but the tool is not the final tool, + # we just return the full output + return AgentFinish({"output": full_output}, full_output) + else: + raise ValueError( + "early_stopping_method should be one of `force` or `generate`, " + f"got {early_stopping_method}" + ) + + @property + @abstractmethod + def _agent_type(self) -> str: + """Return Identifier of agent type.""" + + def dict(self, **kwargs: Any) -> Dict: + """Return dictionary representation of agent.""" + _dict = super().dict() + _dict["_type"] = self._agent_type + return _dict + + def save(self, file_path: Union[Path, str]) -> None: + """Save the agent. + + Args: + file_path: Path to file to save the agent to. + + Example: + .. code-block:: python + + # If working with agent executor + agent.agent.save(file_path="path/agent.yaml") + """ + # Convert file to Path object. + if isinstance(file_path, str): + save_path = Path(file_path) + else: + save_path = file_path + + directory_path = save_path.parent + directory_path.mkdir(parents=True, exist_ok=True) + + # Fetch dictionary to save + agent_dict = self.dict() + + if save_path.suffix == ".json": + with open(file_path, "w") as f: + json.dump(agent_dict, f, indent=4) + elif save_path.suffix == ".yaml": + with open(file_path, "w") as f: + yaml.dump(agent_dict, f, default_flow_style=False) + else: + raise ValueError(f"{save_path} must be json or yaml") + + +class AgentExecutor(Chain, BaseModel): + """Consists of an agent using tools.""" + + agent: Agent + tools: Sequence[BaseTool] + return_intermediate_steps: bool = False + max_iterations: Optional[int] = 15 + early_stopping_method: str = "force" + + @classmethod + def from_agent_and_tools( + cls, + agent: Agent, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, + ) -> AgentExecutor: + """Create from agent and tools.""" + return cls( + agent=agent, tools=tools, callback_manager=callback_manager, **kwargs + ) + + @root_validator() + def validate_tools(cls, values: Dict) -> Dict: + """Validate that tools are compatible with agent.""" + agent = values["agent"] + tools = values["tools"] + if agent.allowed_tools is not None: + if set(agent.allowed_tools) != set([tool.name for tool in tools]): + raise ValueError( + f"Allowed tools ({agent.allowed_tools}) different than " + f"provided tools ({[tool.name for tool in tools]})" + ) + return values + + def save(self, file_path: Union[Path, str]) -> None: + """Raise error - saving not supported for Agent Executors.""" + raise ValueError( + "Saving not supported for agent executors. " + "If you are trying to save the agent, please use the " + "`.save_agent(...)`" + ) + + def save_agent(self, file_path: Union[Path, str]) -> None: + """Save the underlying agent.""" + return self.agent.save(file_path) + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return self.agent.input_keys + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + if self.return_intermediate_steps: + return self.agent.return_values + ["intermediate_steps"] + else: + return self.agent.return_values + + def _should_continue(self, iterations: int) -> bool: + if self.max_iterations is None: + return True + else: + return iterations < self.max_iterations + + def _return(self, output: AgentFinish, intermediate_steps: list) -> Dict[str, Any]: + self.callback_manager.on_agent_finish( + output, color="green", verbose=self.verbose + ) + final_output = output.return_values + if self.return_intermediate_steps: + final_output["intermediate_steps"] = intermediate_steps + return final_output + + def _handle_clarify(self, output: AgentClarify, intermediate_steps: list) -> Dict[str, Any]: + self.callback_manager.on_agent_clarify( + output, color="yellow", verbose=self.verbose + ) + final_output = {"clarify_question": output.question} + if self.return_intermediate_steps: + final_output["intermediate_steps"] = intermediate_steps + return final_output + + + async def _areturn( + self, output: AgentFinish, intermediate_steps: list + ) -> Dict[str, Any]: + if self.callback_manager.is_async: + await self.callback_manager.on_agent_finish( + output, color="green", verbose=self.verbose + ) + else: + self.callback_manager.on_agent_finish( + output, color="green", verbose=self.verbose + ) + final_output = output.return_values + if self.return_intermediate_steps: + final_output["intermediate_steps"] = intermediate_steps + return final_output + + def _take_next_step( + self, + name_to_tool_map: Dict[str, BaseTool], + color_mapping: Dict[str, str], + inputs: Dict[str, str], + intermediate_steps: List[Tuple[AgentAction, str]], + ) -> Union[AgentFinish, Tuple[AgentAction, str], Tuple[AgentClarify, str]]: + """Take a single step in the thought-action-observation loop. + + Override this to take control of how the agent makes and acts on choices. + """ + # Call the LLM to see what to do. + output = self.agent.plan(intermediate_steps, **inputs) + # If the tool chosen is the finishing tool, then we end and return. + if isinstance(output, AgentFinish): + return output + if isinstance(output, AgentClarify): + return output + self.callback_manager.on_agent_action( + output, verbose=self.verbose, color="green" + ) + # Otherwise we lookup the tool + if output.tool in name_to_tool_map: + tool = name_to_tool_map[output.tool] + return_direct = tool.return_direct + color = color_mapping[output.tool] + llm_prefix = "" if return_direct else self.agent.llm_prefix + # We then call the tool on the tool input to get an observation + observation = tool.run( + output.tool_input, + verbose=self.verbose, + color=color, + llm_prefix=llm_prefix, + observation_prefix=self.agent.observation_prefix, + ) + else: + observation = InvalidTool().run( + output.tool, + verbose=self.verbose, + color=None, + llm_prefix="", + observation_prefix=self.agent.observation_prefix, + ) + return output, observation + + async def _atake_next_step( + self, + name_to_tool_map: Dict[str, BaseTool], + color_mapping: Dict[str, str], + inputs: Dict[str, str], + intermediate_steps: List[Tuple[AgentAction, str]], + ) -> Union[AgentFinish, Tuple[AgentAction, str]]: + """Take a single step in the thought-action-observation loop. + + Override this to take control of how the agent makes and acts on choices. + """ + # Call the LLM to see what to do. + output = await self.agent.aplan(intermediate_steps, **inputs) + # If the tool chosen is the finishing tool, then we end and return. + if isinstance(output, AgentFinish): + return output + if self.callback_manager.is_async: + await self.callback_manager.on_agent_action( + output, verbose=self.verbose, color="green" + ) + else: + self.callback_manager.on_agent_action( + output, verbose=self.verbose, color="green" + ) + + # Otherwise we lookup the tool + if output.tool in name_to_tool_map: + tool = name_to_tool_map[output.tool] + return_direct = tool.return_direct + color = color_mapping[output.tool] + llm_prefix = "" if return_direct else self.agent.llm_prefix + # We then call the tool on the tool input to get an observation + observation = await tool.arun( + output.tool_input, + verbose=self.verbose, + color=color, + llm_prefix=llm_prefix, + observation_prefix=self.agent.observation_prefix, + ) + else: + observation = await InvalidTool().arun( + output.tool, + verbose=self.verbose, + color=None, + llm_prefix="", + observation_prefix=self.agent.observation_prefix, + ) + return_direct = False + return output, observation + + def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]: + """Run text through and get agent response.""" + # Do any preparation necessary when receiving a new input. + self.agent.prepare_for_new_call() + # Construct a mapping of tool name to tool for easy lookup + name_to_tool_map = {tool.name: tool for tool in self.tools} + # We construct a mapping from each tool to a color, used for logging. + color_mapping = get_color_mapping( + [tool.name for tool in self.tools], excluded_colors=["green"] + ) + intermediate_steps: List[Tuple[AgentAction, str]] = [] + # Let's start tracking the iterations the agent has gone through + iterations = 0 + # We now enter the agent loop (until it returns something). + while self._should_continue(iterations): + next_step_output = self._take_next_step( + name_to_tool_map, color_mapping, inputs, intermediate_steps + ) + if isinstance(next_step_output, AgentFinish): + return self._return(next_step_output, intermediate_steps) + + if isinstance(next_step_output, AgentClarify): + return self._handle_clarify(next_step_output, intermediate_steps) + + intermediate_steps.append(next_step_output) + # See if tool should return directly + tool_return = self._get_tool_return(next_step_output) + if tool_return is not None: + return self._return(tool_return, intermediate_steps) + iterations += 1 + output = self.agent.return_stopped_response( + self.early_stopping_method, intermediate_steps, **inputs + ) + return self._return(output, intermediate_steps) + + async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]: + """Run text through and get agent response.""" + # Do any preparation necessary when receiving a new input. + self.agent.prepare_for_new_call() + # Construct a mapping of tool name to tool for easy lookup + name_to_tool_map = {tool.name: tool for tool in self.tools} + # We construct a mapping from each tool to a color, used for logging. + color_mapping = get_color_mapping( + [tool.name for tool in self.tools], excluded_colors=["green"] + ) + intermediate_steps: List[Tuple[AgentAction, str]] = [] + # Let's start tracking the iterations the agent has gone through + iterations = 0 + # We now enter the agent loop (until it returns something). + while self._should_continue(iterations): + next_step_output = await self._atake_next_step( + name_to_tool_map, color_mapping, inputs, intermediate_steps + ) + if isinstance(next_step_output, AgentFinish): + return await self._areturn(next_step_output, intermediate_steps) + + intermediate_steps.append(next_step_output) + # See if tool should return directly + tool_return = self._get_tool_return(next_step_output) + if tool_return is not None: + return await self._areturn(tool_return, intermediate_steps) + + iterations += 1 + output = self.agent.return_stopped_response( + self.early_stopping_method, intermediate_steps, **inputs + ) + return await self._areturn(output, intermediate_steps) + + def _get_tool_return( + self, next_step_output: Tuple[AgentAction, str] + ) -> Optional[AgentFinish]: + """Check if the tool is a returning tool.""" + agent_action, observation = next_step_output + name_to_tool_map = {tool.name: tool for tool in self.tools} + # Invalid tools won't be in the map, so we return False. + if agent_action.tool in name_to_tool_map: + if name_to_tool_map[agent_action.tool].return_direct: + return AgentFinish( + {self.agent.return_values[0]: observation}, + "", + ) + return None diff --git a/langchain/agents/agent_toolkits/__init__.py b/langchain/agents/agent_toolkits/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0b65e01d24df58c704cf422140098225d9c11c9b --- /dev/null +++ b/langchain/agents/agent_toolkits/__init__.py @@ -0,0 +1,39 @@ +"""Agent toolkits.""" + +from langchain.agents.agent_toolkits.csv.base import create_csv_agent +from langchain.agents.agent_toolkits.json.base import create_json_agent +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent +from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit +from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent +from langchain.agents.agent_toolkits.python.base import create_python_agent +from langchain.agents.agent_toolkits.sql.base import create_sql_agent +from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain.agents.agent_toolkits.vectorstore.base import ( + create_vectorstore_agent, + create_vectorstore_router_agent, +) +from langchain.agents.agent_toolkits.vectorstore.toolkit import ( + VectorStoreInfo, + VectorStoreRouterToolkit, + VectorStoreToolkit, +) +from langchain.agents.agent_toolkits.zapier.toolkit import ZapierToolkit + +__all__ = [ + "create_json_agent", + "create_sql_agent", + "create_openapi_agent", + "create_python_agent", + "create_vectorstore_agent", + "JsonToolkit", + "SQLDatabaseToolkit", + "OpenAPIToolkit", + "VectorStoreToolkit", + "create_vectorstore_router_agent", + "VectorStoreInfo", + "VectorStoreRouterToolkit", + "create_pandas_dataframe_agent", + "create_csv_agent", + "ZapierToolkit", +] diff --git a/langchain/agents/agent_toolkits/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db502748057710bd2e073f727ac82fe1e6f414cc Binary files /dev/null and b/langchain/agents/agent_toolkits/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3be99f008271b978f3bf6a9b921832adee8367f8 Binary files /dev/null and b/langchain/agents/agent_toolkits/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/base.py b/langchain/agents/agent_toolkits/base.py new file mode 100644 index 0000000000000000000000000000000000000000..ce9b9e43af9601ba2060f1219c0412474b72c14a --- /dev/null +++ b/langchain/agents/agent_toolkits/base.py @@ -0,0 +1,15 @@ +"""Toolkits for agents.""" +from abc import abstractmethod +from typing import List + +from pydantic import BaseModel + +from langchain.tools import BaseTool + + +class BaseToolkit(BaseModel): + """Class responsible for defining a collection of related tools.""" + + @abstractmethod + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" diff --git a/langchain/agents/agent_toolkits/csv/__init__.py b/langchain/agents/agent_toolkits/csv/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e3a1e069d1d9fda881c08680dc66c01f22e2809 --- /dev/null +++ b/langchain/agents/agent_toolkits/csv/__init__.py @@ -0,0 +1 @@ +"""CSV toolkit.""" diff --git a/langchain/agents/agent_toolkits/csv/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/csv/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a4a9cf1a2a44d6d5891f0d570a3fdb5256404be Binary files /dev/null and b/langchain/agents/agent_toolkits/csv/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/csv/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/csv/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..365b047090bb7516ff18e98971bdf358eacda363 Binary files /dev/null and b/langchain/agents/agent_toolkits/csv/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/csv/base.py b/langchain/agents/agent_toolkits/csv/base.py new file mode 100644 index 0000000000000000000000000000000000000000..9bac5436a34469b861d9e788cced736eae68258c --- /dev/null +++ b/langchain/agents/agent_toolkits/csv/base.py @@ -0,0 +1,17 @@ +"""Agent for working with csvs.""" +from typing import Any, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent +from langchain.llms.base import BaseLLM + + +def create_csv_agent( + llm: BaseLLM, path: str, pandas_kwargs: Optional[dict] = None, **kwargs: Any +) -> AgentExecutor: + """Create csv agent by loading to a dataframe and using pandas agent.""" + import pandas as pd + + _kwargs = pandas_kwargs or {} + df = pd.read_csv(path, **_kwargs) + return create_pandas_dataframe_agent(llm, df, **kwargs) diff --git a/langchain/agents/agent_toolkits/json/__init__.py b/langchain/agents/agent_toolkits/json/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfab0ec6f83a544f93a19dc036a188cb08b82433 --- /dev/null +++ b/langchain/agents/agent_toolkits/json/__init__.py @@ -0,0 +1 @@ +"""Json agent.""" diff --git a/langchain/agents/agent_toolkits/json/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/json/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c776fcd85e9d5e126f0041bc069ab29263c1b7e0 Binary files /dev/null and b/langchain/agents/agent_toolkits/json/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/json/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/json/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e51d43bb5076031d1d5ed65040447867c1294069 Binary files /dev/null and b/langchain/agents/agent_toolkits/json/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/json/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/json/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a65e50f901459d8b98c040183f6e901a1c2769e Binary files /dev/null and b/langchain/agents/agent_toolkits/json/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/json/__pycache__/toolkit.cpython-39.pyc b/langchain/agents/agent_toolkits/json/__pycache__/toolkit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7daea3783a9e8803ba468c6bf76bc2de257a92e Binary files /dev/null and b/langchain/agents/agent_toolkits/json/__pycache__/toolkit.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/json/base.py b/langchain/agents/agent_toolkits/json/base.py new file mode 100644 index 0000000000000000000000000000000000000000..48d3fd9cc05c803df88bfaa2f21312f591c0e3dc --- /dev/null +++ b/langchain/agents/agent_toolkits/json/base.py @@ -0,0 +1,43 @@ +"""Json agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_json_agent( + llm: BaseLLM, + toolkit: JsonToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = JSON_PREFIX, + suffix: str = JSON_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a json agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) diff --git a/langchain/agents/agent_toolkits/json/prompt.py b/langchain/agents/agent_toolkits/json/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..a3b7584aca222a88b2035af48657a7d00558b5e5 --- /dev/null +++ b/langchain/agents/agent_toolkits/json/prompt.py @@ -0,0 +1,25 @@ +# flake8: noqa + +JSON_PREFIX = """You are an agent designed to interact with JSON. +Your goal is to return a final answer by interacting with the JSON. +You have access to the following tools which help you learn more about the JSON you are interacting with. +Only use the below tools. Only use the information returned by the below tools to construct your final answer. +Do not make up any information that is not contained in the JSON. +Your input to the tools should be in the form of `data["key"][0]` where `data` is the JSON blob you are interacting with, and the syntax used is Python. +You should only use keys that you know for a fact exist. You must validate that a key exists by seeing it previously when calling `json_spec_list_keys`. +If you have not seen a key in one of those responses, you cannot use it. +You should only add one key at a time to the path. You cannot add multiple keys at once. +If you encounter a "KeyError", go back to the previous key, look at the available keys, and try again. + +If the question does not seem to be related to the JSON, just return "I don't know" as the answer. +Always begin your interaction with the `json_spec_list_keys` tool with input "data" to see what keys exist in the JSON. + +Note that sometimes the value at a given path is large. In this case, you will get an error "Value is a large dictionary, should explore its keys directly". +In this case, you should ALWAYS follow up by using the `json_spec_list_keys` tool to see what keys exist at that path. +Do not simply refer the user to the JSON or a section of the JSON, as this is not a valid answer. Keep digging until you find the answer and explicitly return it. +""" +JSON_SUFFIX = """Begin!" + +Question: {input} +Thought: I should look at the keys that exist in data to see what I have access to +{agent_scratchpad}""" diff --git a/langchain/agents/agent_toolkits/json/toolkit.py b/langchain/agents/agent_toolkits/json/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..a12cf290780a0c387bc4b789cc990ad7be069959 --- /dev/null +++ b/langchain/agents/agent_toolkits/json/toolkit.py @@ -0,0 +1,21 @@ +"""Toolkit for interacting with a JSON spec.""" +from __future__ import annotations + +from typing import List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.json.tool import JsonGetValueTool, JsonListKeysTool, JsonSpec + + +class JsonToolkit(BaseToolkit): + """Toolkit for interacting with a JSON spec.""" + + spec: JsonSpec + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + JsonListKeysTool(spec=self.spec), + JsonGetValueTool(spec=self.spec), + ] diff --git a/langchain/agents/agent_toolkits/openapi/__init__.py b/langchain/agents/agent_toolkits/openapi/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d06e271cd00be5bc75dd27398a5fff15de1afbf --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/__init__.py @@ -0,0 +1 @@ +"""OpenAPI spec agent.""" diff --git a/langchain/agents/agent_toolkits/openapi/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/openapi/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f539a15cb113f6548c9b47570752228479c4920 Binary files /dev/null and b/langchain/agents/agent_toolkits/openapi/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/openapi/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/openapi/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5dec64ce418755629a0092c72447ec25f7ee6064 Binary files /dev/null and b/langchain/agents/agent_toolkits/openapi/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/openapi/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/openapi/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80ee24a5561e8112e7a9d05e053e207090e71de1 Binary files /dev/null and b/langchain/agents/agent_toolkits/openapi/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/openapi/__pycache__/toolkit.cpython-39.pyc b/langchain/agents/agent_toolkits/openapi/__pycache__/toolkit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12c259becc8509092e6fde721626658694038d22 Binary files /dev/null and b/langchain/agents/agent_toolkits/openapi/__pycache__/toolkit.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/openapi/base.py b/langchain/agents/agent_toolkits/openapi/base.py new file mode 100644 index 0000000000000000000000000000000000000000..602ee87fc387f538849a85ab220d2c12183ba24e --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/base.py @@ -0,0 +1,46 @@ +"""OpenAPI spec agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.openapi.prompt import ( + OPENAPI_PREFIX, + OPENAPI_SUFFIX, +) +from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_openapi_agent( + llm: BaseLLM, + toolkit: OpenAPIToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = OPENAPI_PREFIX, + suffix: str = OPENAPI_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a json agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) diff --git a/langchain/agents/agent_toolkits/openapi/prompt.py b/langchain/agents/agent_toolkits/openapi/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..376c3d8cb1330cd1400a3094b6652406f55fb5fc --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/prompt.py @@ -0,0 +1,29 @@ +# flake8: noqa + +OPENAPI_PREFIX = """You are an agent designed to answer questions by making web requests to an API given the openapi spec. + +If the question does not seem related to the API, return I don't know. Do not make up an answer. +Only use information provided by the tools to construct your response. + +First, find the base URL needed to make the request. + +Second, find the relevant paths needed to answer the question. Take note that, sometimes, you might need to make more than one request to more than one path to answer the question. + +Third, find the required parameters needed to make the request. For GET requests, these are usually URL parameters and for POST requests, these are request body parameters. + +Fourth, make the requests needed to answer the question. Ensure that you are sending the correct parameters to the request by checking which parameters are required. For parameters with a fixed set of values, please use the spec to look at which values are allowed. + +Use the exact parameter names as listed in the spec, do not make up any names or abbreviate the names of parameters. +If you get a not found error, ensure that you are using a path that actually exists in the spec. +""" +OPENAPI_SUFFIX = """Begin!" + +Question: {input} +Thought: I should explore the spec to find the base url for the API. +{agent_scratchpad}""" + +DESCRIPTION = """Can be used to answer questions about the openapi spec for the API. Always use this tool before trying to make a request. +Example inputs to this tool: + 'What are the required query parameters for a GET request to the /bar endpoint?` + 'What are the required parameters in the request body for a POST request to the /foo endpoint?' +Always give this tool a specific question.""" diff --git a/langchain/agents/agent_toolkits/openapi/toolkit.py b/langchain/agents/agent_toolkits/openapi/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..25d486343e7830fdc77983fc01b9aba5f08f5142 --- /dev/null +++ b/langchain/agents/agent_toolkits/openapi/toolkit.py @@ -0,0 +1,67 @@ +"""Requests toolkit.""" +from __future__ import annotations + +from typing import Any, List + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.agents.agent_toolkits.json.base import create_json_agent +from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit +from langchain.agents.agent_toolkits.openapi.prompt import DESCRIPTION +from langchain.agents.tools import Tool +from langchain.llms.base import BaseLLM +from langchain.requests import RequestsWrapper +from langchain.tools import BaseTool +from langchain.tools.json.tool import JsonSpec +from langchain.tools.requests.tool import ( + RequestsDeleteTool, + RequestsGetTool, + RequestsPatchTool, + RequestsPostTool, + RequestsPutTool, +) + + +class RequestsToolkit(BaseToolkit): + """Toolkit for making requests.""" + + requests_wrapper: RequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Return a list of tools.""" + return [ + RequestsGetTool(requests_wrapper=self.requests_wrapper), + RequestsPostTool(requests_wrapper=self.requests_wrapper), + RequestsPatchTool(requests_wrapper=self.requests_wrapper), + RequestsPutTool(requests_wrapper=self.requests_wrapper), + RequestsDeleteTool(requests_wrapper=self.requests_wrapper), + ] + + +class OpenAPIToolkit(BaseToolkit): + """Toolkit for interacting with a OpenAPI api.""" + + json_agent: AgentExecutor + requests_wrapper: RequestsWrapper + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + json_agent_tool = Tool( + name="json_explorer", + func=self.json_agent.run, + description=DESCRIPTION, + ) + request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper) + return [*request_toolkit.get_tools(), json_agent_tool] + + @classmethod + def from_llm( + cls, + llm: BaseLLM, + json_spec: JsonSpec, + requests_wrapper: RequestsWrapper, + **kwargs: Any, + ) -> OpenAPIToolkit: + """Create json agent from llm, then initialize.""" + json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs) + return cls(json_agent=json_agent, requests_wrapper=requests_wrapper) diff --git a/langchain/agents/agent_toolkits/pandas/__init__.py b/langchain/agents/agent_toolkits/pandas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6dc608d470e76b6ff07b433bf7dbe8d205847ba --- /dev/null +++ b/langchain/agents/agent_toolkits/pandas/__init__.py @@ -0,0 +1 @@ +"""Pandas toolkit.""" diff --git a/langchain/agents/agent_toolkits/pandas/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/pandas/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eae384aa917dd61ce66b7da008fcc2026dce3be5 Binary files /dev/null and b/langchain/agents/agent_toolkits/pandas/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/pandas/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/pandas/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f0657bfa71a8430e2dac7a676257320300f1dc0 Binary files /dev/null and b/langchain/agents/agent_toolkits/pandas/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/pandas/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/pandas/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d3d202e487466d75314553c0b82b6505d7d3484 Binary files /dev/null and b/langchain/agents/agent_toolkits/pandas/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/pandas/base.py b/langchain/agents/agent_toolkits/pandas/base.py new file mode 100644 index 0000000000000000000000000000000000000000..43bdf90519c8730fbbd80eb862943be44f63919b --- /dev/null +++ b/langchain/agents/agent_toolkits/pandas/base.py @@ -0,0 +1,42 @@ +"""Agent for working with pandas objects.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.pandas.prompt import PREFIX, SUFFIX +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.tools.python.tool import PythonAstREPLTool + + +def create_pandas_dataframe_agent( + llm: BaseLLM, + df: Any, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + suffix: str = SUFFIX, + input_variables: Optional[List[str]] = None, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a pandas agent from an LLM and dataframe.""" + import pandas as pd + + if not isinstance(df, pd.DataFrame): + raise ValueError(f"Expected pandas object, got {type(df)}") + if input_variables is None: + input_variables = ["df", "input", "agent_scratchpad"] + tools = [PythonAstREPLTool(locals={"df": df})] + prompt = ZeroShotAgent.create_prompt( + tools, prefix=prefix, suffix=suffix, input_variables=input_variables + ) + partial_prompt = prompt.partial(df=str(df.head())) + llm_chain = LLMChain( + llm=llm, + prompt=partial_prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) diff --git a/langchain/agents/agent_toolkits/pandas/prompt.py b/langchain/agents/agent_toolkits/pandas/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..525d92e2320f5ad90dbdc0e1603737e16aa990c9 --- /dev/null +++ b/langchain/agents/agent_toolkits/pandas/prompt.py @@ -0,0 +1,13 @@ +# flake8: noqa + +PREFIX = """ +You are working with a pandas dataframe in Python. The name of the dataframe is `df`. +You should use the tools below to answer the question posed of you:""" + +SUFFIX = """ +This is the result of `print(df.head())`: +{df} + +Begin! +Question: {input} +{agent_scratchpad}""" diff --git a/langchain/agents/agent_toolkits/python/__init__.py b/langchain/agents/agent_toolkits/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/langchain/agents/agent_toolkits/python/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/python/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8776272dff3e9a55951385b5d22ac2b1e477b234 Binary files /dev/null and b/langchain/agents/agent_toolkits/python/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/python/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/python/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e281343407e557ae64662f26735b1bdf5dc9ee5 Binary files /dev/null and b/langchain/agents/agent_toolkits/python/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/python/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/python/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0dae95b829bf58c10ffa63873482a24699532198 Binary files /dev/null and b/langchain/agents/agent_toolkits/python/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/python/base.py b/langchain/agents/agent_toolkits/python/base.py new file mode 100644 index 0000000000000000000000000000000000000000..644dd786da05555f7859fd012f555d46902f5bb7 --- /dev/null +++ b/langchain/agents/agent_toolkits/python/base.py @@ -0,0 +1,32 @@ +"""Python agent.""" + +from typing import Any, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.python.prompt import PREFIX +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.tools.python.tool import PythonREPLTool + + +def create_python_agent( + llm: BaseLLM, + tool: PythonREPLTool, + callback_manager: Optional[BaseCallbackManager] = None, + verbose: bool = False, + prefix: str = PREFIX, + **kwargs: Any, +) -> AgentExecutor: + """Construct a python agent from an LLM and tool.""" + tools = [tool] + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) diff --git a/langchain/agents/agent_toolkits/python/prompt.py b/langchain/agents/agent_toolkits/python/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..fc97e7916eb47e66c4fcd09f5a9384b1a2bd094c --- /dev/null +++ b/langchain/agents/agent_toolkits/python/prompt.py @@ -0,0 +1,9 @@ +# flake8: noqa + +PREFIX = """You are an agent designed to write and execute python code to answer questions. +You have access to a python REPL, which you can use to execute python code. +If you get an error, debug your code and try again. +Only use the output of your code to answer the question. +You might know the answer without running any code, but you should still run the code to get the answer. +If it does not seem like you can write code to answer the question, just return "I don't know" as the answer. +""" diff --git a/langchain/agents/agent_toolkits/sql/__init__.py b/langchain/agents/agent_toolkits/sql/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..74293a52391557789bd62c6885ecd91f04b89dc8 --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/__init__.py @@ -0,0 +1 @@ +"""SQL agent.""" diff --git a/langchain/agents/agent_toolkits/sql/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/sql/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8c2e332a56fd7582e63323424f286f094012232 Binary files /dev/null and b/langchain/agents/agent_toolkits/sql/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/sql/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/sql/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8602b03e9c34dd8b0d9d6e36cdcc6a7122645253 Binary files /dev/null and b/langchain/agents/agent_toolkits/sql/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/sql/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/sql/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0766b2ec9318c82e2224348f00b8d9f4fc8e32aa Binary files /dev/null and b/langchain/agents/agent_toolkits/sql/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/sql/__pycache__/simple_sql.cpython-39.pyc b/langchain/agents/agent_toolkits/sql/__pycache__/simple_sql.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ac94f0c4357c4f08ba41ce6f6a0ef6ab95e3e32 Binary files /dev/null and b/langchain/agents/agent_toolkits/sql/__pycache__/simple_sql.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/sql/__pycache__/toolkit.cpython-39.pyc b/langchain/agents/agent_toolkits/sql/__pycache__/toolkit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d9703e77d40e272f69dbfe9f6e2ae402acf768c Binary files /dev/null and b/langchain/agents/agent_toolkits/sql/__pycache__/toolkit.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/sql/base.py b/langchain/agents/agent_toolkits/sql/base.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8e4ea2135cddb5289d99e7645ecf2658cd9fde --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/base.py @@ -0,0 +1,46 @@ +"""SQL agent.""" +from typing import Any, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX +from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_sql_agent( + llm: BaseLLM, + toolkit: SQLDatabaseToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = SQL_PREFIX, + suffix: str = SQL_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a sql agent from an LLM and tools.""" + tools = toolkit.get_tools() + prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k) + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose + ) + diff --git a/langchain/agents/agent_toolkits/sql/prompt.py b/langchain/agents/agent_toolkits/sql/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..549cd14c8c156d434d6af19f9abdd0d1ccf70ea4 --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/prompt.py @@ -0,0 +1,170 @@ +# flake8: noqa +# flake8: noqa + +SQL_PREFIX = """You are an agent designed to interact with a SQL database. +Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. +Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. +You can order the results by a relevant column to return the most interesting examples in the database. +Never query for all the columns from a specific table, only ask for a the few relevant columns given the question. +You have access to tools for interacting with the database. +Only use the below tools. Only use the information returned by the below tools to construct your final answer. +You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again. + +DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. + +If the question does not seem related to the database, just return "I don't know" as the answer. + +In this case, the query is about visitors to the website, +you have access to the following tables: event, channel, page and visitor + +event table: +id: A unique identifier for each event (primary key) +action_data: Additional data related to the event +action_target: The target of the event action +event_type: An integer representing the type of event (1: page load, 2: click, 3: session end, 4: session start, 5: form submit) +timestamp: The date and time the event occurred without time zone information +channel_id: A foreign key that links to the channel table +page_id: A foreign key that links to the page table +visitor_id: A foreign key that links to the visitor table +session_id: A unique identifier for each session +duration_in_seconds: The duration of the event in seconds + +channel table: +id: A unique identifier for each channel (primary key) +source: The origin of the traffic +medium: The type of traffic (e.g. organic, paid) +campaign: The specific marketing campaign associated with the traffic +term: Any associated keywords or search terms + +page table: +id: A unique identifier for each page (primary key) +path: The relative URL path of the page +referrer: The referrer URL if any +search_term: The search term used to find the page, if any +title: The title of the page +url: The full URL of the page + +visitor table: +id: A unique identifier for each visitor (primary key) +company: The visitor's company, if available +country: The visitor's country +latitude: The visitor's latitude +longitude: The visitor's longitude +postal_code: The visitor's postal code +region: The visitor's region +client_id: A unique identifier for the client (linked to the event table) +created_at: The timestamp when the visitor was created +email_address: The visitor's email address, if available +fp_id: A unique fingerprint identifier for the visitor +ip: The visitor's IP address +language: The visitor's preferred language +last_updated: The timestamp of the visitor's last update +library_version: The version of the tracking library used +locale: The visitor's locale information +screen_resolution: The visitor's screen resolution +session_id: A unique identifier for the visitor's session +user_agent: The visitor's browser user agent +visitor_id_type: An integer representing the method used to generate the visitor ID (1: from session_id, 2: from fingerprint, 3: from IP address, 4: from email address, 5: from random number) +timestamp: The timestamp with time zone information + +event table schema: +column_name,data_type +id,bigint (primary key) +action_data,character varying +action_target,character varying +event_type,integer +timestamp,timestamp without time zone +channel_id,bigint (foreign key to channel table) +page_id,bigint (foreign key to page table) +visitor_id,character varying (foreign key to visitor table) +session_id,character varying +duration_in_seconds,bigint (duration of the event) + +event_type = 1 means page load +event_type = 2 means click +event_type = 3 means session end +event_type = 4 means session start +event_type = 5 means form submit + +channel table schema: +column_name,data_type +id,bigint (primary key) +source,character varying +medium,character varying +campaign,character varying +term,character varying + +page table schema: +column_name,data_type +id,bigint (primary key) +path,character varying +referrer,character varying +search_term,character varying +title,character varying +url,character varying + +visitor table schema: +id, character varying (primary key) +column_name,data_type +company,character varying +country,character varying +latitude,character varying +longitude,character varying +postal_code,character varying +region,character varying +client_id,character varying +created_at,bigint +email_address,character varying +fp_id,character varying +ip,character varying +language,character varying +last_updated,bigint +library_version,character varying +locale,character varying +screen_resolution,character varying +session_id,character varying +user_agent,character varying +visitor_id_type,integer +timestamp,timestamp with time zone + +visitor_id_type = 1 means visitor_id is generated from session_id +visitor_id_type = 2 means visitor_id is generated from fingerprint +visitor_id_type = 3 means visitor_id is generated from ip address +visitor_id_type = 4 means visitor_id is generated from email address +visitor_id_type = 5 means visitor_id is generated from random number + + +make sure only use correct table name and column name in the sql query + +example rows in event table: +102,"",Loading,2,1,2023-03-01 15:19:22.389000,96,47,-566777191 +103,"",,2,1,2023-03-01 15:19:25.797000,96,47,-566777191 +104,"",,2,1,2023-03-01 15:19:28.304000,96,47,-566777191 + +example rows in channel table: +4702,"","","",linkedin,linkedin,,,, +96,"","","","",direct,,,, +1002,evergreen,"","",facebook,facebook,,,, + +example rows in page table: +47,,/,"",,React App,https://robot-friends-jupiter-analytic.herokuapp.com/ +48,,/visitor-profiles/-566777191,"",,React App,https://robot-friends-jupiter-analytic.herokuapp.com/visitor-profiles/-566777191?clientId=3 +302,,/,"",,"NEON Paint Protection Film Clear Bra | Gloss, Matt, and Color PPF – Neon Paint Protection Film",https://neonprotectionfilm.com/ + +example rows in visitor table: +1658699292,Council Bluffs,,United States,41.2591,-95.8517,51502,Iowa,2,2023-03-02 20:43:42.000000,,sS5B7iqWrvyz1zxB3R88,35.239.123.81,en-US,2023-03-11 00:52:30.000000,"",,360x640,3bb0cd53-b856-449a-969d-cdba3c6f3199,"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4388.150 Safari/537.36",1 +-396890356,Long Beach,,United States,33.7835,-118.1316,90804,California,5,2023-03-04 06:36:41.000000,,Wk74vtJjA0m9303drhTR,68.190.210.135,en-US,2023-03-04 06:38:39.000000,"",,428x926,fb2ef382-a039-4126-8c59-b914be5e55e6,"Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Mobile/15E148 Safari/604.1",1 +-490968511,Council Bluffs,,United States,41.2591,-95.8517,51502,Iowa,2,2023-03-04 21:22:14.000000,,pLRVZ6tDC83iFASTfD3c,35.224.59.8,en-US,2023-03-04 21:22:14.000000,"",,360x640,659ef7c5-0519-4d93-89a5-6ee96cc06d40,"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4388.150 Safari/537.36",1 + + +Any question regarding visitor behaviors on the website can be answered using event table. +Any question regarding the channel that the visitor came from can be answered using event and channel table. +Any question regarding the page that the visitor visited can be answered using event and page table. +Any question regarding the demographic, location, address of the visitor can be answered using the visitor table. +""" + +SQL_SUFFIX = """Begin! + +Question: {input} +Thought: I should look at the tables in the database to see what I can query. +{agent_scratchpad}""" diff --git a/langchain/agents/agent_toolkits/sql/simple_sql.py b/langchain/agents/agent_toolkits/sql/simple_sql.py new file mode 100644 index 0000000000000000000000000000000000000000..996a248c48a47e1511818fca07b0ffc3f80b9c8a --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/simple_sql.py @@ -0,0 +1,45 @@ +from typing import Any, List, Optional + + +from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX +from langchain.agents.agent_toolkits.sql.toolkit import SimpleSQLDatabaseToolkit +from langchain.agents.mrkl.SQLbot import SQLAgentExecutor, SQLZeroShotAgent + +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + +def create_simple_sql_agent_excutor( + llm: BaseLLM, + toolkit: SimpleSQLDatabaseToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = SQL_PREFIX, + suffix: str = SQL_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + top_k: int = 10, + verbose: bool = False, + **kwargs: Any, +) -> SQLAgentExecutor: + """Construct a sql agent from an LLM and tools.""" + tools = toolkit.get_tools() + prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k) + prompt = SQLZeroShotAgent.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = SQLZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return SQLAgentExecutor.from_agent_and_tools( + agent=agent, tools=toolkit.get_tools(), verbose=verbose, callback_manager=callback_manager + ) diff --git a/langchain/agents/agent_toolkits/sql/toolkit.py b/langchain/agents/agent_toolkits/sql/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..1043331d897abddae3814ff79c7f3c10e17e3c90 --- /dev/null +++ b/langchain/agents/agent_toolkits/sql/toolkit.py @@ -0,0 +1,61 @@ +"""Toolkit for interacting with a SQL database.""" +from typing import List + +from pydantic import Field + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.sql_database import SQLDatabase +from langchain.tools import BaseTool +from langchain.tools.sql_database.tool import ( + InfoSQLDatabaseTool, + ListSQLDatabaseTool, + QueryCheckerTool, + QuerySQLDataBaseTool, ClarifyTool, +) + + +class SQLDatabaseToolkit(BaseToolkit): + """Toolkit for interacting with SQL databases.""" + + db: SQLDatabase = Field(exclude=True) + + @property + def dialect(self) -> str: + """Return string representation of dialect to use.""" + return self.db.dialect + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + QuerySQLDataBaseTool(db=self.db), + InfoSQLDatabaseTool(db=self.db), + ListSQLDatabaseTool(db=self.db), + QueryCheckerTool(db=self.db), + ] + + +class SimpleSQLDatabaseToolkit(BaseToolkit): + """Toolkit for interacting with SQL databases.""" + + db: SQLDatabase = Field(exclude=True) + + @property + def dialect(self) -> str: + """Return string representation of dialect to use.""" + return self.db.dialect + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + QuerySQLDataBaseTool(db=self.db), + ] diff --git a/langchain/agents/agent_toolkits/vectorstore/__init__.py b/langchain/agents/agent_toolkits/vectorstore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ee15a97e4898341a0d1fb68d56071759f07546c6 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/__init__.py @@ -0,0 +1 @@ +"""Agent toolkit for interacting with vector stores.""" diff --git a/langchain/agents/agent_toolkits/vectorstore/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/vectorstore/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f718645842a8a5927c1e2018d0fea1bc67f12bc9 Binary files /dev/null and b/langchain/agents/agent_toolkits/vectorstore/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/vectorstore/__pycache__/base.cpython-39.pyc b/langchain/agents/agent_toolkits/vectorstore/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fb79bd52b4fd21a35758de7c7c7d0c9092a6914 Binary files /dev/null and b/langchain/agents/agent_toolkits/vectorstore/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/vectorstore/__pycache__/prompt.cpython-39.pyc b/langchain/agents/agent_toolkits/vectorstore/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fdcda397222d260b3bd6964be3147b64e9b9ce33 Binary files /dev/null and b/langchain/agents/agent_toolkits/vectorstore/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/vectorstore/__pycache__/toolkit.cpython-39.pyc b/langchain/agents/agent_toolkits/vectorstore/__pycache__/toolkit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb2cd44efa5534ae6e130c6fbfab374172c489be Binary files /dev/null and b/langchain/agents/agent_toolkits/vectorstore/__pycache__/toolkit.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/vectorstore/base.py b/langchain/agents/agent_toolkits/vectorstore/base.py new file mode 100644 index 0000000000000000000000000000000000000000..3b31fab8b57923ae796d6a013015b2227940150b --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/base.py @@ -0,0 +1,55 @@ +"""VectorStore agent.""" +from typing import Any, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.vectorstore.prompt import PREFIX, ROUTER_PREFIX +from langchain.agents.agent_toolkits.vectorstore.toolkit import ( + VectorStoreRouterToolkit, + VectorStoreToolkit, +) +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM + + +def create_vectorstore_agent( + llm: BaseLLM, + toolkit: VectorStoreToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a vectorstore agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) + + +def create_vectorstore_router_agent( + llm: BaseLLM, + toolkit: VectorStoreRouterToolkit, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = ROUTER_PREFIX, + verbose: bool = False, + **kwargs: Any, +) -> AgentExecutor: + """Construct a vectorstore router agent from an LLM and tools.""" + tools = toolkit.get_tools() + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) diff --git a/langchain/agents/agent_toolkits/vectorstore/prompt.py b/langchain/agents/agent_toolkits/vectorstore/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..a2837e56f1858a39beacc6ac2d43d40a84c70ee2 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/prompt.py @@ -0,0 +1,13 @@ +# flake8: noqa + +PREFIX = """You are an agent designed to answer questions about sets of documents. +You have access to tools for interacting with the documents, and the inputs to the tools are questions. +Sometimes, you will be asked to provide sources for your questions, in which case you should use the appropriate tool to do so. +If the question does not seem relevant to any of the tools provided, just return "I don't know" as the answer. +""" + +ROUTER_PREFIX = """You are an agent designed to answer questions. +You have access to tools for interacting with different sources, and the inputs to the tools are questions. +Your main task is to decide which of the tools is relevant for answering question at hand. +For complex questions, you can break the question down into sub questions and use tools to answers the sub questions. +""" diff --git a/langchain/agents/agent_toolkits/vectorstore/toolkit.py b/langchain/agents/agent_toolkits/vectorstore/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..72ad126236167e8038dd81851d3a6a71e55152f8 --- /dev/null +++ b/langchain/agents/agent_toolkits/vectorstore/toolkit.py @@ -0,0 +1,89 @@ +"""Toolkit for interacting with a vector store.""" +from typing import List + +from pydantic import BaseModel, Field + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI +from langchain.tools import BaseTool +from langchain.tools.vectorstore.tool import ( + VectorStoreQATool, + VectorStoreQAWithSourcesTool, +) +from langchain.vectorstores.base import VectorStore + + +class VectorStoreInfo(BaseModel): + """Information about a vectorstore.""" + + vectorstore: VectorStore = Field(exclude=True) + name: str + description: str + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + +class VectorStoreToolkit(BaseToolkit): + """Toolkit for interacting with a vector store.""" + + vectorstore_info: VectorStoreInfo = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + description = VectorStoreQATool.get_description( + self.vectorstore_info.name, self.vectorstore_info.description + ) + qa_tool = VectorStoreQATool( + name=self.vectorstore_info.name, + description=description, + vectorstore=self.vectorstore_info.vectorstore, + llm=self.llm, + ) + description = VectorStoreQAWithSourcesTool.get_description( + self.vectorstore_info.name, self.vectorstore_info.description + ) + qa_with_sources_tool = VectorStoreQAWithSourcesTool( + name=f"{self.vectorstore_info.name}_with_sources", + description=description, + vectorstore=self.vectorstore_info.vectorstore, + llm=self.llm, + ) + return [qa_tool, qa_with_sources_tool] + + +class VectorStoreRouterToolkit(BaseToolkit): + """Toolkit for routing between vectorstores.""" + + vectorstores: List[VectorStoreInfo] = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + tools: List[BaseTool] = [] + for vectorstore_info in self.vectorstores: + description = VectorStoreQATool.get_description( + vectorstore_info.name, vectorstore_info.description + ) + qa_tool = VectorStoreQATool( + name=vectorstore_info.name, + description=description, + vectorstore=vectorstore_info.vectorstore, + llm=self.llm, + ) + tools.append(qa_tool) + return tools diff --git a/langchain/agents/agent_toolkits/zapier/__init__.py b/langchain/agents/agent_toolkits/zapier/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..faef4a3253167aa6767779fbd32292cedcfc4a4e --- /dev/null +++ b/langchain/agents/agent_toolkits/zapier/__init__.py @@ -0,0 +1 @@ +"""Zapier Toolkit.""" diff --git a/langchain/agents/agent_toolkits/zapier/__pycache__/__init__.cpython-39.pyc b/langchain/agents/agent_toolkits/zapier/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9479d86013fab2dea67b1c3f8cddfb4897b2a734 Binary files /dev/null and b/langchain/agents/agent_toolkits/zapier/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/zapier/__pycache__/toolkit.cpython-39.pyc b/langchain/agents/agent_toolkits/zapier/__pycache__/toolkit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..469a8bb785b1ddfd56bf1650f7293f903fe4c4fd Binary files /dev/null and b/langchain/agents/agent_toolkits/zapier/__pycache__/toolkit.cpython-39.pyc differ diff --git a/langchain/agents/agent_toolkits/zapier/toolkit.py b/langchain/agents/agent_toolkits/zapier/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..47e27ce7141298945677073526afb14c5c61f9d0 --- /dev/null +++ b/langchain/agents/agent_toolkits/zapier/toolkit.py @@ -0,0 +1,34 @@ +"""Zapier Toolkit.""" +from typing import List + +from langchain.agents.agent_toolkits.base import BaseToolkit +from langchain.tools import BaseTool +from langchain.tools.zapier.tool import ZapierNLARunAction +from langchain.utilities.zapier import ZapierNLAWrapper + + +class ZapierToolkit(BaseToolkit): + """Zapier Toolkit.""" + + tools: List[BaseTool] = [] + + @classmethod + def from_zapier_nla_wrapper( + cls, zapier_nla_wrapper: ZapierNLAWrapper + ) -> "ZapierToolkit": + """Create a toolkit from a ZapierNLAWrapper.""" + actions = zapier_nla_wrapper.list() + tools = [ + ZapierNLARunAction( + action_id=action["id"], + zapier_description=action["description"], + params_schema=action["params"], + api_wrapper=zapier_nla_wrapper, + ) + for action in actions + ] + return cls(tools=tools) + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return self.tools diff --git a/langchain/agents/chat/__init__.py b/langchain/agents/chat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/langchain/agents/chat/__pycache__/__init__.cpython-39.pyc b/langchain/agents/chat/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb550cc96de64e828388f06740fee4c389b9b808 Binary files /dev/null and b/langchain/agents/chat/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/chat/__pycache__/base.cpython-39.pyc b/langchain/agents/chat/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c156f7cdcf0d0fd34e3d07e0647a4438399366a2 Binary files /dev/null and b/langchain/agents/chat/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/chat/__pycache__/prompt.cpython-39.pyc b/langchain/agents/chat/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..223577ad9c405701c0d995e595310bd0019582b1 Binary files /dev/null and b/langchain/agents/chat/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/chat/base.py b/langchain/agents/chat/base.py new file mode 100644 index 0000000000000000000000000000000000000000..1f843d839cd8e939910763011dfa4fe5d618b340 --- /dev/null +++ b/langchain/agents/chat/base.py @@ -0,0 +1,113 @@ +import json +from typing import Any, List, Optional, Sequence, Tuple + +from langchain.agents.agent import Agent +from langchain.agents.chat.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.schema import AgentAction, BaseLanguageModel +from langchain.tools import BaseTool + +FINAL_ANSWER_ACTION = "Final Answer:" + + +class ChatAgent(Agent): + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return "Observation: " + + @property + def llm_prefix(self) -> str: + """Prefix to append the llm call with.""" + return "Thought:" + + def _construct_scratchpad( + self, intermediate_steps: List[Tuple[AgentAction, str]] + ) -> str: + agent_scratchpad = super()._construct_scratchpad(intermediate_steps) + if not isinstance(agent_scratchpad, str): + raise ValueError("agent_scratchpad should be of type string.") + if agent_scratchpad: + return ( + f"This was your previous work " + f"(but I haven't seen any of it! I only see what " + f"you return as final answer):\n{agent_scratchpad}" + ) + else: + return agent_scratchpad + + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: + if FINAL_ANSWER_ACTION in text: + return "Final Answer", text.split(FINAL_ANSWER_ACTION)[-1].strip() + try: + _, action, _ = text.split("```") + response = json.loads(action.strip()) + return response["action"], response["action_input"] + + except Exception: + raise ValueError(f"Could not parse LLM output: {text}") + + @property + def _stop(self) -> List[str]: + return ["Observation:"] + + @classmethod + def create_prompt( + cls, + tools: Sequence[BaseTool], + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + ) -> BasePromptTemplate: + tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools]) + tool_names = ", ".join([tool.name for tool in tools]) + format_instructions = format_instructions.format(tool_names=tool_names) + template = "\n\n".join([prefix, tool_strings, format_instructions, suffix]) + messages = [ + SystemMessagePromptTemplate.from_template(template), + HumanMessagePromptTemplate.from_template("{input}\n\n{agent_scratchpad}"), + ] + if input_variables is None: + input_variables = ["input", "agent_scratchpad"] + return ChatPromptTemplate(input_variables=input_variables, messages=messages) + + @classmethod + def from_llm_and_tools( + cls, + llm: BaseLanguageModel, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + **kwargs: Any, + ) -> Agent: + """Construct an agent from an LLM and tools.""" + cls._validate_tools(tools) + prompt = cls.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + + @property + def _agent_type(self) -> str: + raise ValueError diff --git a/langchain/agents/chat/prompt.py b/langchain/agents/chat/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3625e20222c5e6c75ad25f1c4a3cd4d8159e7c61 --- /dev/null +++ b/langchain/agents/chat/prompt.py @@ -0,0 +1,29 @@ +# flake8: noqa +PREFIX = """Answer the following questions as best you can. You have access to the following tools:""" +FORMAT_INSTRUCTIONS = """The way you use the tools is by specifying a json blob. +Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here). + +The only values that should be in the "action" field are: {tool_names} + +The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB: + +``` +{{{{ + "action": $TOOL_NAME, + "action_input": $INPUT +}}}} +``` + +ALWAYS use the following format: + +Question: the input question you must answer +Thought: you should always think about what to do +Action: +``` +$JSON_BLOB +``` +Observation: the result of the action +... (this Thought/Action/Observation can repeat N times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question""" +SUFFIX = """Begin! Reminder to always use the exact characters `Final Answer` when responding.""" diff --git a/langchain/agents/conversational/__init__.py b/langchain/agents/conversational/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..94290c9cb9c706d29a874c229aa087a87fb9fe68 --- /dev/null +++ b/langchain/agents/conversational/__init__.py @@ -0,0 +1 @@ +"""An agent designed to hold a conversation in addition to using tools.""" diff --git a/langchain/agents/conversational/__pycache__/__init__.cpython-39.pyc b/langchain/agents/conversational/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..977502735ae9e3c3597a03f7e0c4d9d08ba82726 Binary files /dev/null and b/langchain/agents/conversational/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/conversational/__pycache__/base.cpython-39.pyc b/langchain/agents/conversational/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bacaf947f8bbb1de2b034297df1bae16574dd29 Binary files /dev/null and b/langchain/agents/conversational/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/conversational/__pycache__/prompt.cpython-39.pyc b/langchain/agents/conversational/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e8654211f269261713ccc910f30e0dac89c00cc Binary files /dev/null and b/langchain/agents/conversational/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/conversational/base.py b/langchain/agents/conversational/base.py new file mode 100644 index 0000000000000000000000000000000000000000..e47329d2edeec6f6f0322cabb916becd345cec5b --- /dev/null +++ b/langchain/agents/conversational/base.py @@ -0,0 +1,122 @@ +"""An agent designed to hold a conversation in addition to using tools.""" +from __future__ import annotations + +import re +from typing import Any, List, Optional, Sequence, Tuple + +from langchain.agents.agent import Agent +from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains import LLMChain +from langchain.llms import BaseLLM +from langchain.prompts import PromptTemplate +from langchain.tools.base import BaseTool + + +class ConversationalAgent(Agent): + """An agent designed to hold a conversation in addition to using tools.""" + + ai_prefix: str = "AI" + + @property + def _agent_type(self) -> str: + """Return Identifier of agent type.""" + return "conversational-react-description" + + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return "Observation: " + + @property + def llm_prefix(self) -> str: + """Prefix to append the llm call with.""" + return "Thought:" + + @classmethod + def create_prompt( + cls, + tools: Sequence[BaseTool], + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + ai_prefix: str = "AI", + human_prefix: str = "Human", + input_variables: Optional[List[str]] = None, + ) -> PromptTemplate: + """Create prompt in the style of the zero shot agent. + + Args: + tools: List of tools the agent will have access to, used to format the + prompt. + prefix: String to put before the list of tools. + suffix: String to put after the list of tools. + ai_prefix: String to use before AI output. + human_prefix: String to use before human output. + input_variables: List of input variables the final prompt will expect. + + Returns: + A PromptTemplate with the template assembled from the pieces here. + """ + tool_strings = "\n".join( + [f"> {tool.name}: {tool.description}" for tool in tools] + ) + tool_names = ", ".join([tool.name for tool in tools]) + format_instructions = format_instructions.format( + tool_names=tool_names, ai_prefix=ai_prefix, human_prefix=human_prefix + ) + template = "\n\n".join([prefix, tool_strings, format_instructions, suffix]) + if input_variables is None: + input_variables = ["input", "chat_history", "agent_scratchpad"] + return PromptTemplate(template=template, input_variables=input_variables) + + @property + def finish_tool_name(self) -> str: + """Name of the tool to use to finish the chain.""" + return self.ai_prefix + + def _extract_tool_and_input(self, llm_output: str) -> Optional[Tuple[str, str]]: + if f"{self.ai_prefix}:" in llm_output: + return self.ai_prefix, llm_output.split(f"{self.ai_prefix}:")[-1].strip() + regex = r"Action: (.*?)[\n]*Action Input: (.*)" + match = re.search(regex, llm_output) + if not match: + raise ValueError(f"Could not parse LLM output: `{llm_output}`") + action = match.group(1) + action_input = match.group(2) + return action.strip(), action_input.strip(" ").strip('"') + + @classmethod + def from_llm_and_tools( + cls, + llm: BaseLLM, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + ai_prefix: str = "AI", + human_prefix: str = "Human", + input_variables: Optional[List[str]] = None, + **kwargs: Any, + ) -> Agent: + """Construct an agent from an LLM and tools.""" + cls._validate_tools(tools) + prompt = cls.create_prompt( + tools, + ai_prefix=ai_prefix, + human_prefix=human_prefix, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + return cls( + llm_chain=llm_chain, allowed_tools=tool_names, ai_prefix=ai_prefix, **kwargs + ) diff --git a/langchain/agents/conversational/prompt.py b/langchain/agents/conversational/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..15268a760834452eb3ff990ca6548a80788271d3 --- /dev/null +++ b/langchain/agents/conversational/prompt.py @@ -0,0 +1,36 @@ +# flake8: noqa +PREFIX = """Assistant is a large language model trained by OpenAI. + +Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. + +Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics. + +Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist. + +TOOLS: +------ + +Assistant has access to the following tools:""" +FORMAT_INSTRUCTIONS = """To use a tool, please use the following format: + +``` +Thought: Do I need to use a tool? Yes +Action: the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action +``` + +When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format: + +``` +Thought: Do I need to use a tool? No +{ai_prefix}: [your response here] +```""" + +SUFFIX = """Begin! + +Previous conversation history: +{chat_history} + +New input: {input} +{agent_scratchpad}""" diff --git a/langchain/agents/conversational_chat/__init__.py b/langchain/agents/conversational_chat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..94290c9cb9c706d29a874c229aa087a87fb9fe68 --- /dev/null +++ b/langchain/agents/conversational_chat/__init__.py @@ -0,0 +1 @@ +"""An agent designed to hold a conversation in addition to using tools.""" diff --git a/langchain/agents/conversational_chat/__pycache__/__init__.cpython-39.pyc b/langchain/agents/conversational_chat/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33800126ce406aadfe587e8bf51c95ab5348edf0 Binary files /dev/null and b/langchain/agents/conversational_chat/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/conversational_chat/__pycache__/base.cpython-39.pyc b/langchain/agents/conversational_chat/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0afea758ec8e2a47bbfd5bf2ae68047f2c4668e4 Binary files /dev/null and b/langchain/agents/conversational_chat/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/conversational_chat/__pycache__/prompt.cpython-39.pyc b/langchain/agents/conversational_chat/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..447235b9225376cf76d362cdcccc1c4e8d13d318 Binary files /dev/null and b/langchain/agents/conversational_chat/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/conversational_chat/base.py b/langchain/agents/conversational_chat/base.py new file mode 100644 index 0000000000000000000000000000000000000000..2a1c8b7fa28103bbae77e48edfd77b957214abc6 --- /dev/null +++ b/langchain/agents/conversational_chat/base.py @@ -0,0 +1,157 @@ +"""An agent designed to hold a conversation in addition to using tools.""" +from __future__ import annotations + +import json +from typing import Any, List, Optional, Sequence, Tuple + +from langchain.agents.agent import Agent +from langchain.agents.conversational_chat.prompt import ( + FORMAT_INSTRUCTIONS, + PREFIX, + SUFFIX, + TEMPLATE_TOOL_RESPONSE, +) +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains import LLMChain +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + MessagesPlaceholder, + SystemMessagePromptTemplate, +) +from langchain.schema import ( + AgentAction, + AIMessage, + BaseLanguageModel, + BaseMessage, + BaseOutputParser, + HumanMessage, +) +from langchain.tools.base import BaseTool + + +class AgentOutputParser(BaseOutputParser): + def get_format_instructions(self) -> str: + return FORMAT_INSTRUCTIONS + + def parse(self, text: str) -> Any: + cleaned_output = text.strip() + if "```json" in cleaned_output: + _, cleaned_output = cleaned_output.split("```json") + if "```" in cleaned_output: + cleaned_output, _ = cleaned_output.split("```") + if cleaned_output.startswith("```json"): + cleaned_output = cleaned_output[len("```json") :] + if cleaned_output.startswith("```"): + cleaned_output = cleaned_output[len("```") :] + if cleaned_output.endswith("```"): + cleaned_output = cleaned_output[: -len("```")] + cleaned_output = cleaned_output.strip() + response = json.loads(cleaned_output) + return {"action": response["action"], "action_input": response["action_input"]} + + +class ConversationalChatAgent(Agent): + """An agent designed to hold a conversation in addition to using tools.""" + + output_parser: BaseOutputParser + + @property + def _agent_type(self) -> str: + raise NotImplementedError + + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return "Observation: " + + @property + def llm_prefix(self) -> str: + """Prefix to append the llm call with.""" + return "Thought:" + + @classmethod + def create_prompt( + cls, + tools: Sequence[BaseTool], + system_message: str = PREFIX, + human_message: str = SUFFIX, + input_variables: Optional[List[str]] = None, + output_parser: Optional[BaseOutputParser] = None, + ) -> BasePromptTemplate: + tool_strings = "\n".join( + [f"> {tool.name}: {tool.description}" for tool in tools] + ) + tool_names = ", ".join([tool.name for tool in tools]) + _output_parser = output_parser or AgentOutputParser() + format_instructions = human_message.format( + format_instructions=_output_parser.get_format_instructions() + ) + final_prompt = format_instructions.format( + tool_names=tool_names, tools=tool_strings + ) + if input_variables is None: + input_variables = ["input", "chat_history", "agent_scratchpad"] + messages = [ + SystemMessagePromptTemplate.from_template(system_message), + MessagesPlaceholder(variable_name="chat_history"), + HumanMessagePromptTemplate.from_template(final_prompt), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ] + return ChatPromptTemplate(input_variables=input_variables, messages=messages) + + def _extract_tool_and_input(self, llm_output: str) -> Optional[Tuple[str, str]]: + try: + response = self.output_parser.parse(llm_output) + return response["action"], response["action_input"] + except Exception: + raise ValueError(f"Could not parse LLM output: {llm_output}") + + def _construct_scratchpad( + self, intermediate_steps: List[Tuple[AgentAction, str]] + ) -> List[BaseMessage]: + """Construct the scratchpad that lets the agent continue its thought process.""" + thoughts: List[BaseMessage] = [] + for action, observation in intermediate_steps: + thoughts.append(AIMessage(content=action.log)) + human_message = HumanMessage( + content=TEMPLATE_TOOL_RESPONSE.format(observation=observation) + ) + thoughts.append(human_message) + return thoughts + + @classmethod + def from_llm_and_tools( + cls, + llm: BaseLanguageModel, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + system_message: str = PREFIX, + human_message: str = SUFFIX, + input_variables: Optional[List[str]] = None, + output_parser: Optional[BaseOutputParser] = None, + **kwargs: Any, + ) -> Agent: + """Construct an agent from an LLM and tools.""" + cls._validate_tools(tools) + _output_parser = output_parser or AgentOutputParser() + prompt = cls.create_prompt( + tools, + system_message=system_message, + human_message=human_message, + input_variables=input_variables, + output_parser=_output_parser, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + return cls( + llm_chain=llm_chain, + allowed_tools=tool_names, + output_parser=_output_parser, + **kwargs, + ) diff --git a/langchain/agents/conversational_chat/prompt.py b/langchain/agents/conversational_chat/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..8c4737bbcf6ba7e67ebbba6dae78f082f27410cc --- /dev/null +++ b/langchain/agents/conversational_chat/prompt.py @@ -0,0 +1,57 @@ +# flake8: noqa +PREFIX = """Assistant is a large language model trained by OpenAI. + +Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. + +Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics. + +Overall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.""" + +FORMAT_INSTRUCTIONS = """RESPONSE FORMAT INSTRUCTIONS +---------------------------- + +When responding to me please, please output a response in one of two formats: + +**Option 1:** +Use this if you want the human to use a tool. +Markdown code snippet formatted in the following schema: + +```json +{{{{ + "action": string \\ The action to take. Must be one of {tool_names} + "action_input": string \\ The input to the action +}}}} +``` + +**Option #2:** +Use this if you want to respond directly to the human. Markdown code snippet formatted in the following schema: + +```json +{{{{ + "action": "Final Answer", + "action_input": string \\ You should put what you want to return to use here +}}}} +```""" + +SUFFIX = """TOOLS +------ +Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are: + +{{tools}} + +{format_instructions} + +USER'S INPUT +-------------------- +Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else): + +{{{{input}}}}""" + +TEMPLATE_TOOL_RESPONSE = """TOOL RESPONSE: +--------------------- +{observation} + +USER'S INPUT +-------------------- + +Okay, so what is the response to my original question? If using information from tools, you must say it explicitly - I have forgotten all TOOL RESPONSES! Remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else.""" diff --git a/langchain/agents/initialize.py b/langchain/agents/initialize.py new file mode 100644 index 0000000000000000000000000000000000000000..825f27d884578f2bfa4b2d3fd3fff461743994b1 --- /dev/null +++ b/langchain/agents/initialize.py @@ -0,0 +1,75 @@ +"""Load agent.""" +from typing import Any, Optional, Sequence + +from langchain.agents.agent import AgentExecutor +from langchain.agents.loading import AGENT_TO_CLASS, load_agent +from langchain.callbacks.base import BaseCallbackManager +from langchain.llms.base import BaseLLM +from langchain.tools.base import BaseTool + + +def initialize_agent( + tools: Sequence[BaseTool], + llm: BaseLLM, + agent: Optional[str] = None, + callback_manager: Optional[BaseCallbackManager] = None, + agent_path: Optional[str] = None, + agent_kwargs: Optional[dict] = None, + **kwargs: Any, +) -> AgentExecutor: + """Load an agent executor given tools and LLM. + + Args: + tools: List of tools this agent has access to. + llm: Language model to use as the agent. + agent: A string that specified the agent type to use. Valid options are: + `zero-shot-react-description` + `react-docstore` + `self-ask-with-search` + `conversational-react-description` + `chat-zero-shot-react-description`, + `chat-conversational-react-description`, + If None and agent_path is also None, will default to + `zero-shot-react-description`. + callback_manager: CallbackManager to use. Global callback manager is used if + not provided. Defaults to None. + agent_path: Path to serialized agent to use. + agent_kwargs: Additional key word arguments to pass to the underlying agent + **kwargs: Additional key word arguments passed to the agent executor + + Returns: + An agent executor + """ + if agent is None and agent_path is None: + agent = "zero-shot-react-description" + if agent is not None and agent_path is not None: + raise ValueError( + "Both `agent` and `agent_path` are specified, " + "but at most only one should be." + ) + if agent is not None: + if agent not in AGENT_TO_CLASS: + raise ValueError( + f"Got unknown agent type: {agent}. " + f"Valid types are: {AGENT_TO_CLASS.keys()}." + ) + agent_cls = AGENT_TO_CLASS[agent] + agent_kwargs = agent_kwargs or {} + agent_obj = agent_cls.from_llm_and_tools( + llm, tools, callback_manager=callback_manager, **agent_kwargs + ) + elif agent_path is not None: + agent_obj = load_agent( + agent_path, llm=llm, tools=tools, callback_manager=callback_manager + ) + else: + raise ValueError( + "Somehow both `agent` and `agent_path` are None, " + "this should never happen." + ) + return AgentExecutor.from_agent_and_tools( + agent=agent_obj, + tools=tools, + callback_manager=callback_manager, + **kwargs, + ) diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..1ca5a86e8500c878ec0870da8fbfc2ea6f2d4e31 --- /dev/null +++ b/langchain/agents/load_tools.py @@ -0,0 +1,268 @@ +# flake8: noqa +"""Load tools.""" +from typing import Any, List, Optional + +from langchain.agents.tools import Tool +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.api import news_docs, open_meteo_docs, tmdb_docs, podcast_docs +from langchain.chains.api.base import APIChain +from langchain.chains.llm_math.base import LLMMathChain +from langchain.chains.pal.base import PALChain +from langchain.llms.base import BaseLLM +from langchain.requests import RequestsWrapper +from langchain.tools.base import BaseTool +from langchain.tools.bing_search.tool import BingSearchRun +from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun +from langchain.tools.human.tool import HumanInputRun +from langchain.tools.python.tool import PythonREPLTool +from langchain.tools.requests.tool import RequestsGetTool +from langchain.tools.wikipedia.tool import WikipediaQueryRun +from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun +from langchain.utilities.bash import BashProcess +from langchain.utilities.bing_search import BingSearchAPIWrapper +from langchain.utilities.google_search import GoogleSearchAPIWrapper +from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.searx_search import SearxSearchWrapper +from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper +from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper + + +def _get_python_repl() -> BaseTool: + return PythonREPLTool() + + +def _get_requests() -> BaseTool: + return RequestsGetTool(requests_wrapper=RequestsWrapper()) + + +def _get_terminal() -> BaseTool: + return Tool( + name="Terminal", + description="Executes commands in a terminal. Input should be valid commands, and the output will be any output from running that command.", + func=BashProcess().run, + ) + + +_BASE_TOOLS = { + "python_repl": _get_python_repl, + "requests": _get_requests, + "terminal": _get_terminal, +} + + +def _get_pal_math(llm: BaseLLM) -> BaseTool: + return Tool( + name="PAL-MATH", + description="A language model that is really good at solving complex word math problems. Input should be a fully worded hard word math problem.", + func=PALChain.from_math_prompt(llm).run, + ) + + +def _get_pal_colored_objects(llm: BaseLLM) -> BaseTool: + return Tool( + name="PAL-COLOR-OBJ", + description="A language model that is really good at reasoning about position and the color attributes of objects. Input should be a fully worded hard reasoning problem. Make sure to include all information about the objects AND the final question you want to answer.", + func=PALChain.from_colored_object_prompt(llm).run, + ) + + +def _get_llm_math(llm: BaseLLM) -> BaseTool: + return Tool( + name="Calculator", + description="Useful for when you need to answer questions about math.", + func=LLMMathChain(llm=llm, callback_manager=llm.callback_manager).run, + coroutine=LLMMathChain(llm=llm, callback_manager=llm.callback_manager).arun, + ) + + +def _get_open_meteo_api(llm: BaseLLM) -> BaseTool: + chain = APIChain.from_llm_and_api_docs(llm, open_meteo_docs.OPEN_METEO_DOCS) + return Tool( + name="Open Meteo API", + description="Useful for when you want to get weather information from the OpenMeteo API. The input should be a question in natural language that this API can answer.", + func=chain.run, + ) + + +_LLM_TOOLS = { + "pal-math": _get_pal_math, + "pal-colored-objects": _get_pal_colored_objects, + "llm-math": _get_llm_math, + "open-meteo-api": _get_open_meteo_api, +} + + +def _get_news_api(llm: BaseLLM, **kwargs: Any) -> BaseTool: + news_api_key = kwargs["news_api_key"] + chain = APIChain.from_llm_and_api_docs( + llm, news_docs.NEWS_DOCS, headers={"X-Api-Key": news_api_key} + ) + return Tool( + name="News API", + description="Use this when you want to get information about the top headlines of current news stories. The input should be a question in natural language that this API can answer.", + func=chain.run, + ) + + +def _get_tmdb_api(llm: BaseLLM, **kwargs: Any) -> BaseTool: + tmdb_bearer_token = kwargs["tmdb_bearer_token"] + chain = APIChain.from_llm_and_api_docs( + llm, + tmdb_docs.TMDB_DOCS, + headers={"Authorization": f"Bearer {tmdb_bearer_token}"}, + ) + return Tool( + name="TMDB API", + description="Useful for when you want to get information from The Movie Database. The input should be a question in natural language that this API can answer.", + func=chain.run, + ) + + +def _get_podcast_api(llm: BaseLLM, **kwargs: Any) -> BaseTool: + listen_api_key = kwargs["listen_api_key"] + chain = APIChain.from_llm_and_api_docs( + llm, + podcast_docs.PODCAST_DOCS, + headers={"X-ListenAPI-Key": listen_api_key}, + ) + return Tool( + name="Podcast API", + description="Use the Listen Notes Podcast API to search all podcasts or episodes. The input should be a question in natural language that this API can answer.", + func=chain.run, + ) + + +def _get_wolfram_alpha(**kwargs: Any) -> BaseTool: + return WolframAlphaQueryRun(api_wrapper=WolframAlphaAPIWrapper(**kwargs)) + + +def _get_google_search(**kwargs: Any) -> BaseTool: + return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs)) + + +def _get_wikipedia(**kwargs: Any) -> BaseTool: + return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs)) + + +def _get_google_serper(**kwargs: Any) -> BaseTool: + return Tool( + name="Serper Search", + func=GoogleSerperAPIWrapper(**kwargs).run, + description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.", + ) + + +def _get_google_search_results_json(**kwargs: Any) -> BaseTool: + return GoogleSearchResults(api_wrapper=GoogleSearchAPIWrapper(**kwargs)) + + +def _get_serpapi(**kwargs: Any) -> BaseTool: + return Tool( + name="Search", + description="A search engine. Useful for when you need to answer questions about current events. Input should be a search query.", + func=SerpAPIWrapper(**kwargs).run, + coroutine=SerpAPIWrapper(**kwargs).arun, + ) + + +def _get_searx_search(**kwargs: Any) -> BaseTool: + return Tool( + name="SearX Search", + description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.", + func=SearxSearchWrapper(**kwargs).run, + ) + + +def _get_bing_search(**kwargs: Any) -> BaseTool: + return BingSearchRun(api_wrapper=BingSearchAPIWrapper(**kwargs)) + + +def _get_human_tool(**kwargs: Any) -> BaseTool: + return HumanInputRun(**kwargs) + + +_EXTRA_LLM_TOOLS = { + "news-api": (_get_news_api, ["news_api_key"]), + "tmdb-api": (_get_tmdb_api, ["tmdb_bearer_token"]), + "podcast-api": (_get_podcast_api, ["listen_api_key"]), +} + +_EXTRA_OPTIONAL_TOOLS = { + "wolfram-alpha": (_get_wolfram_alpha, ["wolfram_alpha_appid"]), + "google-search": (_get_google_search, ["google_api_key", "google_cse_id"]), + "google-search-results-json": ( + _get_google_search_results_json, + ["google_api_key", "google_cse_id", "num_results"], + ), + "bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]), + "google-serper": (_get_google_serper, ["serper_api_key"]), + "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), + "searx-search": (_get_searx_search, ["searx_host"]), + "wikipedia": (_get_wikipedia, ["top_k_results"]), + "human": (_get_human_tool, ["prompt_func", "input_func"]), +} + + +def load_tools( + tool_names: List[str], + llm: Optional[BaseLLM] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> List[BaseTool]: + """Load tools based on their name. + + Args: + tool_names: name of tools to load. + llm: Optional language model, may be needed to initialize certain tools. + callback_manager: Optional callback manager. If not provided, default global callback manager will be used. + + Returns: + List of tools. + """ + tools = [] + for name in tool_names: + if name in _BASE_TOOLS: + tools.append(_BASE_TOOLS[name]()) + elif name in _LLM_TOOLS: + if llm is None: + raise ValueError(f"Tool {name} requires an LLM to be provided") + tool = _LLM_TOOLS[name](llm) + if callback_manager is not None: + tool.callback_manager = callback_manager + tools.append(tool) + elif name in _EXTRA_LLM_TOOLS: + if llm is None: + raise ValueError(f"Tool {name} requires an LLM to be provided") + _get_llm_tool_func, extra_keys = _EXTRA_LLM_TOOLS[name] + missing_keys = set(extra_keys).difference(kwargs) + if missing_keys: + raise ValueError( + f"Tool {name} requires some parameters that were not " + f"provided: {missing_keys}" + ) + sub_kwargs = {k: kwargs[k] for k in extra_keys} + tool = _get_llm_tool_func(llm=llm, **sub_kwargs) + if callback_manager is not None: + tool.callback_manager = callback_manager + tools.append(tool) + elif name in _EXTRA_OPTIONAL_TOOLS: + _get_tool_func, extra_keys = _EXTRA_OPTIONAL_TOOLS[name] + sub_kwargs = {k: kwargs[k] for k in extra_keys if k in kwargs} + tool = _get_tool_func(**sub_kwargs) + if callback_manager is not None: + tool.callback_manager = callback_manager + tools.append(tool) + else: + raise ValueError(f"Got unknown tool {name}") + return tools + + +def get_all_tool_names() -> List[str]: + """Get a list of all possible tool names.""" + return ( + list(_BASE_TOOLS) + + list(_EXTRA_OPTIONAL_TOOLS) + + list(_EXTRA_LLM_TOOLS) + + list(_LLM_TOOLS) + ) diff --git a/langchain/agents/loading.py b/langchain/agents/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..930ce925f4e3d7b7f8e1f107bbdb65f296fb61b8 --- /dev/null +++ b/langchain/agents/loading.py @@ -0,0 +1,111 @@ +"""Functionality for loading agents.""" +import json +from pathlib import Path +from typing import Any, List, Optional, Union + +import yaml + +from langchain.agents.agent import Agent +from langchain.agents.chat.base import ChatAgent +from langchain.agents.conversational.base import ConversationalAgent +from langchain.agents.conversational_chat.base import ConversationalChatAgent +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.agents.react.base import ReActDocstoreAgent +from langchain.agents.self_ask_with_search.base import SelfAskWithSearchAgent +from langchain.agents.tools import Tool +from langchain.chains.loading import load_chain, load_chain_from_config +from langchain.llms.base import BaseLLM +from langchain.utilities.loading import try_load_from_hub + +AGENT_TO_CLASS = { + "zero-shot-react-description": ZeroShotAgent, + "react-docstore": ReActDocstoreAgent, + "self-ask-with-search": SelfAskWithSearchAgent, + "conversational-react-description": ConversationalAgent, + "chat-zero-shot-react-description": ChatAgent, + "chat-conversational-react-description": ConversationalChatAgent, +} + +URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/agents/" + + +def _load_agent_from_tools( + config: dict, llm: BaseLLM, tools: List[Tool], **kwargs: Any +) -> Agent: + config_type = config.pop("_type") + if config_type not in AGENT_TO_CLASS: + raise ValueError(f"Loading {config_type} agent not supported") + + if config_type not in AGENT_TO_CLASS: + raise ValueError(f"Loading {config_type} agent not supported") + agent_cls = AGENT_TO_CLASS[config_type] + combined_config = {**config, **kwargs} + return agent_cls.from_llm_and_tools(llm, tools, **combined_config) + + +def load_agent_from_config( + config: dict, + llm: Optional[BaseLLM] = None, + tools: Optional[List[Tool]] = None, + **kwargs: Any, +) -> Agent: + """Load agent from Config Dict.""" + if "_type" not in config: + raise ValueError("Must specify an agent Type in config") + load_from_tools = config.pop("load_from_llm_and_tools", False) + if load_from_tools: + if llm is None: + raise ValueError( + "If `load_from_llm_and_tools` is set to True, " + "then LLM must be provided" + ) + if tools is None: + raise ValueError( + "If `load_from_llm_and_tools` is set to True, " + "then tools must be provided" + ) + return _load_agent_from_tools(config, llm, tools, **kwargs) + config_type = config.pop("_type") + + if config_type not in AGENT_TO_CLASS: + raise ValueError(f"Loading {config_type} agent not supported") + + agent_cls = AGENT_TO_CLASS[config_type] + if "llm_chain" in config: + config["llm_chain"] = load_chain_from_config(config.pop("llm_chain")) + elif "llm_chain_path" in config: + config["llm_chain"] = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` and `llm_chain_path` should be specified.") + combined_config = {**config, **kwargs} + return agent_cls(**combined_config) # type: ignore + + +def load_agent(path: Union[str, Path], **kwargs: Any) -> Agent: + """Unified method for loading a agent from LangChainHub or local fs.""" + if hub_result := try_load_from_hub( + path, _load_agent_from_file, "agents", {"json", "yaml"} + ): + return hub_result + else: + return _load_agent_from_file(path, **kwargs) + + +def _load_agent_from_file(file: Union[str, Path], **kwargs: Any) -> Agent: + """Load agent from file.""" + # Convert file to Path object. + if isinstance(file, str): + file_path = Path(file) + else: + file_path = file + # Load from either json or yaml. + if file_path.suffix == ".json": + with open(file_path) as f: + config = json.load(f) + elif file_path.suffix == ".yaml": + with open(file_path, "r") as f: + config = yaml.safe_load(f) + else: + raise ValueError("File type must be json or yaml") + # Load the agent from the config now. + return load_agent_from_config(config, **kwargs) diff --git a/langchain/agents/mrkl/SQLbot.py b/langchain/agents/mrkl/SQLbot.py new file mode 100644 index 0000000000000000000000000000000000000000..0499ef827efa2450aacfdc6b12483d79d4d4c99c --- /dev/null +++ b/langchain/agents/mrkl/SQLbot.py @@ -0,0 +1,81 @@ +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.schema import AgentAction, AgentFinish +from langchain.agents.agent import AgentExecutor +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from langchain.input import get_color_mapping +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.tools.base import BaseTool +from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX +from langchain.agents.agent_toolkits.sql.toolkit import SimpleSQLDatabaseToolkit +from langchain.prompts import PromptTemplate +from langchain.output_parsers.pydantic import SQLOutput +from langchain.output_parsers import PydanticOutputParser + + +class SQLAgentExecutor(AgentExecutor): + sqlPad = [] + state = "" + """A MRKL chain that uses SQL to store data.""" + + def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]: + self.agent.prepare_for_new_call() + # Construct a mapping of tool name to tool for easy lookup + name_to_tool_map = {tool.name: tool for tool in self.tools} + # We construct a mapping from each tool to a color, used for logging. + color_mapping = get_color_mapping( + [tool.name for tool in self.tools], excluded_colors=["green"] + ) + intermediate_steps: List[Tuple[AgentAction, str]] = [] + # Let's start tracking the iterations the agent has gone through + iterations = 0 + # We now enter the agent loop (until it returns something). + while self._should_continue(iterations): + next_step_output = self._take_next_step( + name_to_tool_map, color_mapping, inputs, intermediate_steps + ) + if isinstance(next_step_output, AgentFinish): + return self._return(next_step_output, intermediate_steps) + else: + agent_action, observation = next_step_output + if agent_action.tool == "query_sql_db": + self.sqlPad.append(agent_action.tool_input) + print(self.sqlPad) + + intermediate_steps.append(next_step_output) + iterations += 1 + output = self.agent.return_stopped_response( + self.early_stopping_method, intermediate_steps, **inputs + ) + return self._return(output, intermediate_steps) + + +class SQLZeroShotAgent(ZeroShotAgent): + @classmethod + def create_prompt( + cls, + tools: Sequence[BaseTool], + prefix: str = SQL_PREFIX, + suffix: str = SQL_SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + ) -> PromptTemplate: + tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools]) + tool_names = ", ".join([tool.name for tool in tools]) + format_instructions = format_instructions.format(tool_names=tool_names) + output_parser = PydanticOutputParser(pydantic_object=SQLOutput) + output_format_instructions = "{output_format_instructions}" + template = "\n\n".join([prefix, + tool_strings, + format_instructions, + output_format_instructions, + suffix]) + if input_variables is None: + input_variables = ["input", "agent_scratchpad"] + + return PromptTemplate(template=template, + output_parser=output_parser, + input_variables=input_variables, + partial_variables={"output_format_instructions": output_parser.get_format_instructions()}) diff --git a/langchain/agents/mrkl/__init__.py b/langchain/agents/mrkl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a86a5b510d195acb0c2a2c8b9a292a8926221eb1 --- /dev/null +++ b/langchain/agents/mrkl/__init__.py @@ -0,0 +1 @@ +"""Attempt to implement MRKL systems as described in arxiv.org/pdf/2205.00445.pdf.""" diff --git a/langchain/agents/mrkl/__pycache__/SQLbot.cpython-39.pyc b/langchain/agents/mrkl/__pycache__/SQLbot.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a8855d6079e90fba5d603913fb327ca192f0ad7 Binary files /dev/null and b/langchain/agents/mrkl/__pycache__/SQLbot.cpython-39.pyc differ diff --git a/langchain/agents/mrkl/__pycache__/__init__.cpython-39.pyc b/langchain/agents/mrkl/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ecf37318ccfa4a28deab2af697474f4d391c6a7 Binary files /dev/null and b/langchain/agents/mrkl/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/mrkl/__pycache__/base.cpython-39.pyc b/langchain/agents/mrkl/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccdf0ce235568ef6bb084eb47f52b1484414b63e Binary files /dev/null and b/langchain/agents/mrkl/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/mrkl/__pycache__/prompt.cpython-39.pyc b/langchain/agents/mrkl/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19b09a47fbef6421c36dc1f9b4261160bedc6b0c Binary files /dev/null and b/langchain/agents/mrkl/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/mrkl/base.py b/langchain/agents/mrkl/base.py new file mode 100644 index 0000000000000000000000000000000000000000..64f0ed71e66c53a115e2e5f249e4202e38907238 --- /dev/null +++ b/langchain/agents/mrkl/base.py @@ -0,0 +1,203 @@ +"""Attempt to implement MRKL systems as described in arxiv.org/pdf/2205.00445.pdf.""" +from __future__ import annotations + +import re +from typing import Any, Callable, List, NamedTuple, Optional, Sequence, Tuple + +from langchain.agents.agent import Agent, AgentExecutor +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX +from langchain.agents.tools import Tool +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains import LLMChain +from langchain.llms.base import BaseLLM +from langchain.prompts import PromptTemplate +from langchain.tools.base import BaseTool + +FINAL_ANSWER_ACTION = "Final Answer:" + + +class ChainConfig(NamedTuple): + """Configuration for chain to use in MRKL system. + + Args: + action_name: Name of the action. + action: Action function to call. + action_description: Description of the action. + """ + + action_name: str + action: Callable + action_description: str + + +def get_action_and_input(llm_output: str) -> Tuple[str, str]: + """Parse out the action and input from the LLM output. + + Note: if you're specifying a custom prompt for the ZeroShotAgent, + you will need to ensure that it meets the following Regex requirements. + The string starting with "Action:" and the following string starting + with "Action Input:" should be separated by a newline. + """ + if FINAL_ANSWER_ACTION in llm_output: + return "Final Answer", llm_output.split(FINAL_ANSWER_ACTION)[-1].strip() + regex = r"Action: (.*?)[\n]*Action Input: (.*)" + match = re.search(regex, llm_output, re.DOTALL) + if not match: + raise ValueError(f"Could not parse LLM output: `{llm_output}`") + action = match.group(1).strip() + action_input = match.group(2) + return action, action_input.strip(" ").strip('"') + + +class ZeroShotAgent(Agent): + """Agent for the MRKL chain.""" + + @property + def _agent_type(self) -> str: + """Return Identifier of agent type.""" + return "zero-shot-react-description" + + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return "Observation: " + + @property + def llm_prefix(self) -> str: + """Prefix to append the llm call with.""" + return "Thought:" + + @classmethod + def create_prompt( + cls, + tools: Sequence[BaseTool], + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + ) -> PromptTemplate: + """Create prompt in the style of the zero shot agent. + + Args: + tools: List of tools the agent will have access to, used to format the + prompt. + prefix: String to put before the list of tools. + suffix: String to put after the list of tools. + input_variables: List of input variables the final prompt will expect. + + Returns: + A PromptTemplate with the template assembled from the pieces here. + """ + tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools]) + tool_names = ", ".join([tool.name for tool in tools]) + format_instructions = format_instructions.format(tool_names=tool_names) + template = "\n\n".join([prefix, tool_strings, format_instructions, suffix]) + if input_variables is None: + input_variables = ["input", "agent_scratchpad"] + return PromptTemplate(template=template, input_variables=input_variables) + + @classmethod + def from_llm_and_tools( + cls, + llm: BaseLLM, + tools: Sequence[BaseTool], + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = PREFIX, + suffix: str = SUFFIX, + format_instructions: str = FORMAT_INSTRUCTIONS, + input_variables: Optional[List[str]] = None, + **kwargs: Any, + ) -> Agent: + """Construct an agent from an LLM and tools.""" + cls._validate_tools(tools) + prompt = cls.create_prompt( + tools, + prefix=prefix, + suffix=suffix, + format_instructions=format_instructions, + input_variables=input_variables, + ) + llm_chain = LLMChain( + llm=llm, + prompt=prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs) + + @classmethod + def _validate_tools(cls, tools: Sequence[BaseTool]) -> None: + for tool in tools: + if tool.description is None: + raise ValueError( + f"Got a tool {tool.name} without a description. For this agent, " + f"a description must always be provided." + ) + + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: + return get_action_and_input(text) + + +class MRKLChain(AgentExecutor): + """Chain that implements the MRKL system. + + Example: + .. code-block:: python + + from langchain import OpenAI, MRKLChain + from langchain.chains.mrkl.base import ChainConfig + llm = OpenAI(temperature=0) + prompt = PromptTemplate(...) + chains = [...] + mrkl = MRKLChain.from_chains(llm=llm, prompt=prompt) + """ + + @classmethod + def from_chains( + cls, llm: BaseLLM, chains: List[ChainConfig], **kwargs: Any + ) -> AgentExecutor: + """User friendly way to initialize the MRKL chain. + + This is intended to be an easy way to get up and running with the + MRKL chain. + + Args: + llm: The LLM to use as the agent LLM. + chains: The chains the MRKL system has access to. + **kwargs: parameters to be passed to initialization. + + Returns: + An initialized MRKL chain. + + Example: + .. code-block:: python + + from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, MRKLChain + from langchain.chains.mrkl.base import ChainConfig + llm = OpenAI(temperature=0) + search = SerpAPIWrapper() + llm_math_chain = LLMMathChain(llm=llm) + chains = [ + ChainConfig( + action_name = "Search", + action=search.search, + action_description="useful for searching" + ), + ChainConfig( + action_name="Calculator", + action=llm_math_chain.run, + action_description="useful for doing math" + ) + ] + mrkl = MRKLChain.from_chains(llm, chains) + """ + tools = [ + Tool( + name=c.action_name, + func=c.action, + description=c.action_description, + ) + for c in chains + ] + agent = ZeroShotAgent.from_llm_and_tools(llm, tools) + return cls(agent=agent, tools=tools, **kwargs) diff --git a/langchain/agents/mrkl/plan_and_action.py b/langchain/agents/mrkl/plan_and_action.py new file mode 100644 index 0000000000000000000000000000000000000000..0482333bde89bb09cdc1dcda8b013b34d366ceaa --- /dev/null +++ b/langchain/agents/mrkl/plan_and_action.py @@ -0,0 +1,26 @@ +EXECUTION_FORMAT_INSTRUCTIONS = """ +1.Use the following format to structure each question: + +Question: the input question you must answer +Thought: you should always think about what to do +Clarification: Any clarification question you want to ask before deciding the action, Queries that can be posed in order to obtain greater clarity or understanding about the input question +Confidence Level: The confidence level you can answer the input question without user answering the clarification question above, should be a float number between 0 and 1, 1 means 100% confident +Plan: you should always plan the list of things you need to do to answer the question + +Thought: you should always think about what to do +Action: the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Action/Action Input/Observation can repeat N times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question +Please follow these guidelines: + +1. Ensure only fill in Thought, Plan, Ask, Action, Action Input. Do not fill in Observation. +3. Ensure both action and action input are included, and separate Action Input with a new line. +4. Ensure to start sql query with "SELECT" immediately after "Action Input:". +5. Ensure to wait for Observation after Action Input. +6. Ensure not to fill in Observation youself, it will be filled in automatically. +7. When the thought is "I now know the final answer", start a new line with exact key word "Final Answer:" and provide the answer. Do not add any further "Thought" or "Action" sections. + +""" diff --git a/langchain/agents/mrkl/prompt.py b/langchain/agents/mrkl/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..49a1303214ec79c014a03cabbb79243be4dfbb04 --- /dev/null +++ b/langchain/agents/mrkl/prompt.py @@ -0,0 +1,30 @@ +# flake8: noqa +PREFIX = """Answer the following questions as best you can. You have access to the following tools:""" +FORMAT_INSTRUCTIONS = """ +1.Use the following format to structure each question: + +Question: the input question you must answer +Thought: you should always think about what to do +Plan: you should always plan the list of things you need to do to answer the question +Action: the action to take, should be one of [{tool_names}] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Plan/Action/Action Input/Observation can repeat N times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question +Please follow these guidelines: + +1. Ensure only fill in Thought, Plan, Action, Action Input. Do not fill in Observation. +2. Ensure separate each section of Thought, Plan, Action, Action Input with a new line +3. Ensure start each section with the exactly keyword "Thought: ", "Plan: ", "Action: ", "Action Input: ". +4. Ensure to start sql query with "SELECT" immediately after "Action Input: ". Do not add any further characters like +5. Ensure to wait for Observation after Action Input. +6. Ensure not to fill in Observation yourself, it will be filled in automatically. +7. When the thought is "I now know the final answer", start a new line with exact key word "Final Answer:" and provide the answer. Do not add any further "Thought" or "Action" sections. + +""" +SUFFIX = """Begin! + +Question: {input} +Thought:{agent_scratchpad}""" + diff --git a/langchain/agents/react/__init__.py b/langchain/agents/react/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34518432c0225e5a67393e2ff12484288b926117 --- /dev/null +++ b/langchain/agents/react/__init__.py @@ -0,0 +1 @@ +"""Implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf.""" diff --git a/langchain/agents/react/__pycache__/__init__.cpython-39.pyc b/langchain/agents/react/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1565a9279d14876e0c6ddb0e904cf16e52e186f5 Binary files /dev/null and b/langchain/agents/react/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/react/__pycache__/base.cpython-39.pyc b/langchain/agents/react/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19ec64a237d0a2d4727fc33cf6c9d25fbb68abb3 Binary files /dev/null and b/langchain/agents/react/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/react/__pycache__/textworld_prompt.cpython-39.pyc b/langchain/agents/react/__pycache__/textworld_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3fa740e0232c948a879a60d1a201ce08d3a8c17 Binary files /dev/null and b/langchain/agents/react/__pycache__/textworld_prompt.cpython-39.pyc differ diff --git a/langchain/agents/react/__pycache__/wiki_prompt.cpython-39.pyc b/langchain/agents/react/__pycache__/wiki_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3988426d07dcd447d84c537d7f4456cc705ad23c Binary files /dev/null and b/langchain/agents/react/__pycache__/wiki_prompt.cpython-39.pyc differ diff --git a/langchain/agents/react/base.py b/langchain/agents/react/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a9d030dcf2a61c62060f51fa258b66517ca7b360 --- /dev/null +++ b/langchain/agents/react/base.py @@ -0,0 +1,151 @@ +"""Chain that implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf.""" +import re +from typing import Any, List, Optional, Sequence, Tuple + +from pydantic import BaseModel + +from langchain.agents.agent import Agent, AgentExecutor +from langchain.agents.react.textworld_prompt import TEXTWORLD_PROMPT +from langchain.agents.react.wiki_prompt import WIKI_PROMPT +from langchain.agents.tools import Tool +from langchain.docstore.base import Docstore +from langchain.docstore.document import Document +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.tools.base import BaseTool + + +class ReActDocstoreAgent(Agent, BaseModel): + """Agent for the ReAct chain.""" + + @property + def _agent_type(self) -> str: + """Return Identifier of agent type.""" + return "react-docstore" + + @classmethod + def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate: + """Return default prompt.""" + return WIKI_PROMPT + + i: int = 1 + + @classmethod + def _validate_tools(cls, tools: Sequence[BaseTool]) -> None: + if len(tools) != 2: + raise ValueError(f"Exactly two tools must be specified, but got {tools}") + tool_names = {tool.name for tool in tools} + if tool_names != {"Lookup", "Search"}: + raise ValueError( + f"Tool names should be Lookup and Search, got {tool_names}" + ) + + def _prepare_for_new_call(self) -> None: + self.i = 1 + + def _fix_text(self, text: str) -> str: + return text + f"\nAction {self.i}:" + + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: + action_prefix = f"Action {self.i}: " + if not text.split("\n")[-1].startswith(action_prefix): + return None + self.i += 1 + action_block = text.split("\n")[-1] + + action_str = action_block[len(action_prefix) :] + # Parse out the action and the directive. + re_matches = re.search(r"(.*?)\[(.*?)\]", action_str) + if re_matches is None: + raise ValueError(f"Could not parse action directive: {action_str}") + return re_matches.group(1), re_matches.group(2) + + @property + def finish_tool_name(self) -> str: + """Name of the tool of when to finish the chain.""" + return "Finish" + + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return f"Observation {self.i - 1}: " + + @property + def _stop(self) -> List[str]: + return [f"\nObservation {self.i}:"] + + @property + def llm_prefix(self) -> str: + """Prefix to append the LLM call with.""" + return f"Thought {self.i}:" + + +class DocstoreExplorer: + """Class to assist with exploration of a document store.""" + + def __init__(self, docstore: Docstore): + """Initialize with a docstore, and set initial document to None.""" + self.docstore = docstore + self.document: Optional[Document] = None + + def search(self, term: str) -> str: + """Search for a term in the docstore, and if found save.""" + result = self.docstore.search(term) + if isinstance(result, Document): + self.document = result + return self.document.summary + else: + self.document = None + return result + + def lookup(self, term: str) -> str: + """Lookup a term in document (if saved).""" + if self.document is None: + raise ValueError("Cannot lookup without a successful search first") + return self.document.lookup(term) + + +class ReActTextWorldAgent(ReActDocstoreAgent, BaseModel): + """Agent for the ReAct TextWorld chain.""" + + @classmethod + def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate: + """Return default prompt.""" + return TEXTWORLD_PROMPT + + @classmethod + def _validate_tools(cls, tools: Sequence[BaseTool]) -> None: + if len(tools) != 1: + raise ValueError(f"Exactly one tool must be specified, but got {tools}") + tool_names = {tool.name for tool in tools} + if tool_names != {"Play"}: + raise ValueError(f"Tool name should be Play, got {tool_names}") + + +class ReActChain(AgentExecutor): + """Chain that implements the ReAct paper. + + Example: + .. code-block:: python + + from langchain import ReActChain, OpenAI + react = ReAct(llm=OpenAI()) + """ + + def __init__(self, llm: BaseLLM, docstore: Docstore, **kwargs: Any): + """Initialize with the LLM and a docstore.""" + docstore_explorer = DocstoreExplorer(docstore) + tools = [ + Tool( + name="Search", + func=docstore_explorer.search, + description="Search for a term in the docstore.", + ), + Tool( + name="Lookup", + func=docstore_explorer.lookup, + description="Lookup a term in the docstore.", + ), + ] + agent = ReActDocstoreAgent.from_llm_and_tools(llm, tools) + super().__init__(agent=agent, tools=tools, **kwargs) diff --git a/langchain/agents/react/textworld_prompt.py b/langchain/agents/react/textworld_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..b832a6bb6c6f54f05ef8b00caae6e1b8d5b1e424 --- /dev/null +++ b/langchain/agents/react/textworld_prompt.py @@ -0,0 +1,52 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +EXAMPLES = [ + """Setup: You are now playing a fast paced round of TextWorld! Here is your task for +today. First of all, you could, like, try to travel east. After that, take the +binder from the locker. With the binder, place the binder on the mantelpiece. +Alright, thanks! + +-= Vault =- +You've just walked into a vault. You begin to take stock of what's here. + +An open safe is here. What a letdown! The safe is empty! You make out a shelf. +But the thing hasn't got anything on it. What, you think everything in TextWorld +should have stuff on it? + +You don't like doors? Why not try going east, that entranceway is unguarded. + +Thought 1: I need to travel east +Action 1: Play[go east] +Observation 1: -= Office =- +You arrive in an office. An ordinary one. + +You can make out a locker. The locker contains a binder. You see a case. The +case is empty, what a horrible day! You lean against the wall, inadvertently +pressing a secret button. The wall opens up to reveal a mantelpiece. You wonder +idly who left that here. The mantelpiece is standard. The mantelpiece appears to +be empty. If you haven't noticed it already, there seems to be something there +by the wall, it's a table. Unfortunately, there isn't a thing on it. Hm. Oh well +There is an exit to the west. Don't worry, it is unguarded. + +Thought 2: I need to take the binder from the locker +Action 2: Play[take binder] +Observation 2: You take the binder from the locker. + +Thought 3: I need to place the binder on the mantelpiece +Action 3: Play[put binder on mantelpiece] + +Observation 3: You put the binder on the mantelpiece. +Your score has just gone up by one point. +*** The End *** +Thought 4: The End has occurred +Action 4: Finish[yes] + +""" +] +SUFFIX = """\n\nSetup: {input} +{agent_scratchpad}""" + +TEXTWORLD_PROMPT = PromptTemplate.from_examples( + EXAMPLES, SUFFIX, ["input", "agent_scratchpad"] +) diff --git a/langchain/agents/react/wiki_prompt.py b/langchain/agents/react/wiki_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..879a554ad3a09e9719085fbf59162a2d681d343b --- /dev/null +++ b/langchain/agents/react/wiki_prompt.py @@ -0,0 +1,115 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +EXAMPLES = [ + """Question: What is the elevation range for the area that the eastern sector of the +Colorado orogeny extends into? +Thought 1: I need to search Colorado orogeny, find the area that the eastern sector +of the Colorado orogeny extends into, then find the elevation range of the +area. +Action 1: Search[Colorado orogeny] +Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in +Colorado and surrounding areas. +Thought 2: It does not mention the eastern sector. So I need to look up eastern +sector. +Action 2: Lookup[eastern sector] +Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called +the Central Plains orogeny. +Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I +need to search High Plains and find its elevation range. +Action 3: Search[High Plains] +Observation 3: High Plains refers to one of two distinct land regions +Thought 4: I need to instead search High Plains (United States). +Action 4: Search[High Plains (United States)] +Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the +High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 +m).[3] +Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer +is 1,800 to 7,000 ft. +Action 5: Finish[1,800 to 7,000 ft]""", + """Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" +character Milhouse, who Matt Groening named after who? +Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after +who. I only need to search Milhouse and find who it is named after. +Action 1: Search[Milhouse] +Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated +television series The Simpsons voiced by Pamela Hayden and created by Matt +Groening. +Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up +"named after". +Action 2: Lookup[named after] +Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose +middle name was Milhous. +Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is +Richard Nixon. +Action 3: Finish[Richard Nixon]""", + """Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The +Saimaa Gesture? +Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which +documentary is about Finnish rock groups. +Action 1: Search[Adam Clayton Powell] +Observation 1 Could not find [Adam Clayton Powell]. Similar: [’Adam Clayton Powell +III’, ’Seventh Avenue (Manhattan)’, ’Adam Clayton Powell Jr. State Office +Building’, ’Isabel Washington Powell’, ’Adam Powell’, ’Adam Clayton Powell +(film)’, ’Giancarlo Esposito’]. +Thought 2: To find the documentary, I can search Adam Clayton Powell (film). +Action 2: Search[Adam Clayton Powell (film)] +Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by +Richard Kilberg. The film is about the rise and fall of influential +African-American politician Adam Clayton Powell Jr.[3][4] It was later aired +as part of the PBS series The American Experience. +Thought 3: Adam Clayton Powell (film) is a documentary about an African-American +politician, not Finnish rock groups. So the documentary about Finnish rock +groups must instead be The Saimaa Gesture. +Action 3: Finish[The Saimaa Gesture]""", + """Question: What profession does Nicholas Ray and Elia Kazan have in common? +Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then +find the profession they have in common. +Action 1: Search[Nicholas Ray] +Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, +1979) was an American film director, screenwriter, and actor best known for +the 1955 film Rebel Without a Cause. +Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need +to search Elia Kazan next and find his professions. +Action 2: Search[Elia Kazan] +Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter +and actor. +Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. +So profession Nicholas Ray and Elia Kazan have in common is director, +screenwriter, and actor. +Action 3: Finish[director, screenwriter, actor]""", + """Question: Which magazine was started first Arthur’s Magazine or First for Women? +Thought 1: I need to search Arthur’s Magazine and First for Women, and find which was +started first. +Action 1: Search[Arthur’s Magazine] +Observation 1: Arthur’s Magazine (1844-1846) was an American literary periodical published +in Philadelphia in the 19th century. +Thought 2: Arthur’s Magazine was started in 1844. I need to search First for Women +next. +Action 2: Search[First for Women] +Observation 2: First for Women is a woman’s magazine published by Bauer Media Group in the +USA.[1] The magazine was started in 1989. +Thought 3: First for Women was started in 1989. 1844 (Arthur’s Magazine) < 1989 (First +for Women), so Arthur’s Magazine was started first. +Action 3: Finish[Arthur’s Magazine]""", + """Question: Were Pavel Urysohn and Leonid Levin known for the same type of work? +Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, +then find if they are the same. +Action 1: Search[Pavel Urysohn] +Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet +mathematician who is best known for his contributions in dimension theory. +Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and +find its type of work. +Action 2: Search[Leonid Levin] +Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer +scientist. +Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn +and Leonid Levin have the same type of work. +Action 3: Finish[yes]""", +] +SUFFIX = """\nQuestion: {input} +{agent_scratchpad}""" + +WIKI_PROMPT = PromptTemplate.from_examples( + EXAMPLES, SUFFIX, ["input", "agent_scratchpad"] +) diff --git a/langchain/agents/self_ask_with_search/__init__.py b/langchain/agents/self_ask_with_search/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..70a450ac3e6242687a1a3435878ceebacdd17108 --- /dev/null +++ b/langchain/agents/self_ask_with_search/__init__.py @@ -0,0 +1,4 @@ +"""Chain that does self ask with search. + +Heavily borrowed from https://github.com/ofirpress/self-ask +""" diff --git a/langchain/agents/self_ask_with_search/__pycache__/__init__.cpython-39.pyc b/langchain/agents/self_ask_with_search/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10a08c3365739023906418fa5e2a209b7673763a Binary files /dev/null and b/langchain/agents/self_ask_with_search/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/agents/self_ask_with_search/__pycache__/base.cpython-39.pyc b/langchain/agents/self_ask_with_search/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac21a2e1617829492cc22912f1ed221ea6cef8fa Binary files /dev/null and b/langchain/agents/self_ask_with_search/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/agents/self_ask_with_search/__pycache__/prompt.cpython-39.pyc b/langchain/agents/self_ask_with_search/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51e2676b4e269c192d9ca4c3ae51201a447818ff Binary files /dev/null and b/langchain/agents/self_ask_with_search/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/agents/self_ask_with_search/base.py b/langchain/agents/self_ask_with_search/base.py new file mode 100644 index 0000000000000000000000000000000000000000..694273e3bdf9da49dd0021ccc0f8f689c980bb98 --- /dev/null +++ b/langchain/agents/self_ask_with_search/base.py @@ -0,0 +1,95 @@ +"""Chain that does self ask with search.""" +from typing import Any, Optional, Sequence, Tuple, Union + +from langchain.agents.agent import Agent, AgentExecutor +from langchain.agents.self_ask_with_search.prompt import PROMPT +from langchain.agents.tools import Tool +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.tools.base import BaseTool +from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.serpapi import SerpAPIWrapper + + +class SelfAskWithSearchAgent(Agent): + """Agent for the self-ask-with-search paper.""" + + @property + def _agent_type(self) -> str: + """Return Identifier of agent type.""" + return "self-ask-with-search" + + @classmethod + def create_prompt(cls, tools: Sequence[BaseTool]) -> BasePromptTemplate: + """Prompt does not depend on tools.""" + return PROMPT + + @classmethod + def _validate_tools(cls, tools: Sequence[BaseTool]) -> None: + if len(tools) != 1: + raise ValueError(f"Exactly one tool must be specified, but got {tools}") + tool_names = {tool.name for tool in tools} + if tool_names != {"Intermediate Answer"}: + raise ValueError( + f"Tool name should be Intermediate Answer, got {tool_names}" + ) + + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: + followup = "Follow up:" + last_line = text.split("\n")[-1] + + if followup not in last_line: + finish_string = "So the final answer is: " + if finish_string not in last_line: + return None + return "Final Answer", last_line[len(finish_string) :] + + after_colon = text.split(":")[-1] + + if " " == after_colon[0]: + after_colon = after_colon[1:] + + return "Intermediate Answer", after_colon + + def _fix_text(self, text: str) -> str: + return f"{text}\nSo the final answer is:" + + @property + def observation_prefix(self) -> str: + """Prefix to append the observation with.""" + return "Intermediate answer: " + + @property + def llm_prefix(self) -> str: + """Prefix to append the LLM call with.""" + return "" + + @property + def starter_string(self) -> str: + """Put this string after user input but before first LLM call.""" + return "Are follow up questions needed here:" + + +class SelfAskWithSearchChain(AgentExecutor): + """Chain that does self ask with search. + + Example: + .. code-block:: python + + from langchain import SelfAskWithSearchChain, OpenAI, GoogleSerperAPIWrapper + search_chain = GoogleSerperAPIWrapper() + self_ask = SelfAskWithSearchChain(llm=OpenAI(), search_chain=search_chain) + """ + + def __init__( + self, + llm: BaseLLM, + search_chain: Union[GoogleSerperAPIWrapper, SerpAPIWrapper], + **kwargs: Any, + ): + """Initialize with just an LLM and a search chain.""" + search_tool = Tool( + name="Intermediate Answer", func=search_chain.run, description="Search" + ) + agent = SelfAskWithSearchAgent.from_llm_and_tools(llm, [search_tool]) + super().__init__(agent=agent, tools=[search_tool], **kwargs) diff --git a/langchain/agents/self_ask_with_search/prompt.py b/langchain/agents/self_ask_with_search/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..c82de28dfbe6a2bab89dd013d72b024b693b2d10 --- /dev/null +++ b/langchain/agents/self_ask_with_search/prompt.py @@ -0,0 +1,44 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_TEMPLATE = """Question: Who lived longer, Muhammad Ali or Alan Turing? +Are follow up questions needed here: Yes. +Follow up: How old was Muhammad Ali when he died? +Intermediate answer: Muhammad Ali was 74 years old when he died. +Follow up: How old was Alan Turing when he died? +Intermediate answer: Alan Turing was 41 years old when he died. +So the final answer is: Muhammad Ali + +Question: When was the founder of craigslist born? +Are follow up questions needed here: Yes. +Follow up: Who was the founder of craigslist? +Intermediate answer: Craigslist was founded by Craig Newmark. +Follow up: When was Craig Newmark born? +Intermediate answer: Craig Newmark was born on December 6, 1952. +So the final answer is: December 6, 1952 + +Question: Who was the maternal grandfather of George Washington? +Are follow up questions needed here: Yes. +Follow up: Who was the mother of George Washington? +Intermediate answer: The mother of George Washington was Mary Ball Washington. +Follow up: Who was the father of Mary Ball Washington? +Intermediate answer: The father of Mary Ball Washington was Joseph Ball. +So the final answer is: Joseph Ball + +Question: Are both the directors of Jaws and Casino Royale from the same country? +Are follow up questions needed here: Yes. +Follow up: Who is the director of Jaws? +Intermediate answer: The director of Jaws is Steven Spielberg. +Follow up: Where is Steven Spielberg from? +Intermediate answer: The United States. +Follow up: Who is the director of Casino Royale? +Intermediate answer: The director of Casino Royale is Martin Campbell. +Follow up: Where is Martin Campbell from? +Intermediate answer: New Zealand. +So the final answer is: No + +Question: {input} +Are followup questions needed here:{agent_scratchpad}""" +PROMPT = PromptTemplate( + input_variables=["input", "agent_scratchpad"], template=_DEFAULT_TEMPLATE +) diff --git a/langchain/agents/tools.py b/langchain/agents/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..99e9b741f9f77421765f1ca27e5a3168813c52fe --- /dev/null +++ b/langchain/agents/tools.py @@ -0,0 +1,103 @@ +"""Interface for tools.""" +from inspect import signature +from typing import Any, Awaitable, Callable, Optional, Union + +from langchain.tools.base import BaseTool + + +class Tool(BaseTool): + """Tool that takes in function or coroutine directly.""" + + description: str = "" + func: Callable[[str], str] + coroutine: Optional[Callable[[str], Awaitable[str]]] = None + + def _run(self, tool_input: str) -> str: + """Use the tool.""" + return self.func(tool_input) + + async def _arun(self, tool_input: str) -> str: + """Use the tool asynchronously.""" + if self.coroutine: + return await self.coroutine(tool_input) + raise NotImplementedError("Tool does not support async") + + # TODO: this is for backwards compatibility, remove in future + def __init__( + self, name: str, func: Callable[[str], str], description: str, **kwargs: Any + ) -> None: + """Initialize tool.""" + super(Tool, self).__init__( + name=name, func=func, description=description, **kwargs + ) + + +class InvalidTool(BaseTool): + """Tool that is run when invalid tool name is encountered by agent.""" + + name = "invalid_tool" + description = "Called when tool name is invalid." + + def _run(self, tool_name: str) -> str: + """Use the tool.""" + return f"{tool_name} is not a valid tool, try another one." + + async def _arun(self, tool_name: str) -> str: + """Use the tool asynchronously.""" + return f"{tool_name} is not a valid tool, try another one." + + +def tool(*args: Union[str, Callable], return_direct: bool = False) -> Callable: + """Make tools out of functions, can be used with or without arguments. + + Requires: + - Function must be of type (str) -> str + - Function must have a docstring + + Examples: + .. code-block:: python + + @tool + def search_api(query: str) -> str: + # Searches the API for the query. + return + + @tool("search", return_direct=True) + def search_api(query: str) -> str: + # Searches the API for the query. + return + """ + + def _make_with_name(tool_name: str) -> Callable: + def _make_tool(func: Callable[[str], str]) -> Tool: + assert func.__doc__, "Function must have a docstring" + # Description example: + # search_api(query: str) - Searches the API for the query. + description = f"{tool_name}{signature(func)} - {func.__doc__.strip()}" + tool_ = Tool( + name=tool_name, + func=func, + description=description, + return_direct=return_direct, + ) + return tool_ + + return _make_tool + + if len(args) == 1 and isinstance(args[0], str): + # if the argument is a string, then we use the string as the tool name + # Example usage: @tool("search", return_direct=True) + return _make_with_name(args[0]) + elif len(args) == 1 and callable(args[0]): + # if the argument is a function, then we use the function name as the tool name + # Example usage: @tool + return _make_with_name(args[0].__name__)(args[0]) + elif len(args) == 0: + # if there are no arguments, then we use the function name as the tool name + # Example usage: @tool(return_direct=True) + def _partial(func: Callable[[str], str]) -> BaseTool: + return _make_with_name(func.__name__)(func) + + return _partial + else: + raise ValueError("Too many arguments for tool decorator") diff --git a/langchain/cache.py b/langchain/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..3d7149d91f1f3f24894b3fa898c7abbc39c66a5d --- /dev/null +++ b/langchain/cache.py @@ -0,0 +1,139 @@ +"""Beta Feature: base interface for cache.""" +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + +from sqlalchemy import Column, Integer, String, create_engine, select +from sqlalchemy.engine.base import Engine +from sqlalchemy.orm import Session + +try: + from sqlalchemy.orm import declarative_base +except ImportError: + from sqlalchemy.ext.declarative import declarative_base + +from langchain.schema import Generation + +RETURN_VAL_TYPE = List[Generation] + + +class BaseCache(ABC): + """Base interface for cache.""" + + @abstractmethod + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + + @abstractmethod + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + + +class InMemoryCache(BaseCache): + """Cache that stores things in memory.""" + + def __init__(self) -> None: + """Initialize with empty cache.""" + self._cache: Dict[Tuple[str, str], RETURN_VAL_TYPE] = {} + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + return self._cache.get((prompt, llm_string), None) + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + self._cache[(prompt, llm_string)] = return_val + + +Base = declarative_base() + + +class FullLLMCache(Base): # type: ignore + """SQLite table for full LLM Cache (all generations).""" + + __tablename__ = "full_llm_cache" + prompt = Column(String, primary_key=True) + llm = Column(String, primary_key=True) + idx = Column(Integer, primary_key=True) + response = Column(String) + + +class SQLAlchemyCache(BaseCache): + """Cache that uses SQAlchemy as a backend.""" + + def __init__(self, engine: Engine, cache_schema: Any = FullLLMCache): + """Initialize by creating all tables.""" + self.engine = engine + self.cache_schema = cache_schema + self.cache_schema.metadata.create_all(self.engine) + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + stmt = ( + select(self.cache_schema.response) + .where(self.cache_schema.prompt == prompt) + .where(self.cache_schema.llm == llm_string) + .order_by(self.cache_schema.idx) + ) + with Session(self.engine) as session: + generations = [Generation(text=row[0]) for row in session.execute(stmt)] + if len(generations) > 0: + return generations + return None + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Look up based on prompt and llm_string.""" + for i, generation in enumerate(return_val): + item = self.cache_schema( + prompt=prompt, llm=llm_string, response=generation.text, idx=i + ) + with Session(self.engine) as session, session.begin(): + session.merge(item) + + +class SQLiteCache(SQLAlchemyCache): + """Cache that uses SQLite as a backend.""" + + def __init__(self, database_path: str = ".langchain.db"): + """Initialize by creating the engine and all tables.""" + engine = create_engine(f"sqlite:///{database_path}") + super().__init__(engine) + + +class RedisCache(BaseCache): + """Cache that uses Redis as a backend.""" + + def __init__(self, redis_: Any): + """Initialize by passing in Redis instance.""" + try: + from redis import Redis + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + if not isinstance(redis_, Redis): + raise ValueError("Please pass in Redis object.") + self.redis = redis_ + + def _key(self, prompt: str, llm_string: str, idx: int) -> str: + """Compute key from prompt, llm_string, and idx.""" + return str(hash(prompt + llm_string)) + "_" + str(idx) + + def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: + """Look up based on prompt and llm_string.""" + idx = 0 + generations = [] + while self.redis.get(self._key(prompt, llm_string, idx)): + result = self.redis.get(self._key(prompt, llm_string, idx)) + if not result: + break + elif isinstance(result, bytes): + result = result.decode() + generations.append(Generation(text=result)) + idx += 1 + return generations if generations else None + + def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: + """Update cache based on prompt and llm_string.""" + for i, generation in enumerate(return_val): + self.redis.set(self._key(prompt, llm_string, i), generation.text) diff --git a/langchain/callbacks/__init__.py b/langchain/callbacks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8796409475f8dbced4379a84ccfe0af182a13698 --- /dev/null +++ b/langchain/callbacks/__init__.py @@ -0,0 +1,79 @@ +"""Callback handlers that allow listening to events in LangChain.""" +import os +from contextlib import contextmanager +from typing import Generator, Optional + +from langchain.callbacks.base import ( + BaseCallbackHandler, + BaseCallbackManager, + CallbackManager, +) +from langchain.callbacks.openai_info import OpenAICallbackHandler +from langchain.callbacks.shared import SharedCallbackManager +from langchain.callbacks.stdout import StdOutCallbackHandler +from langchain.callbacks.tracers import SharedLangChainTracer +from langchain.callbacks.wandb_callback import WandbCallbackHandler + + +def get_callback_manager() -> BaseCallbackManager: + """Return the shared callback manager.""" + return SharedCallbackManager() + + +def set_handler(handler: BaseCallbackHandler) -> None: + """Set handler.""" + callback = get_callback_manager() + callback.set_handler(handler) + + +def set_default_callback_manager() -> None: + """Set default callback manager.""" + default_handler = os.environ.get("LANGCHAIN_HANDLER", "stdout") + if default_handler == "stdout": + set_handler(StdOutCallbackHandler()) + elif default_handler == "langchain": + session = os.environ.get("LANGCHAIN_SESSION") + set_tracing_callback_manager(session) + else: + raise ValueError( + f"LANGCHAIN_HANDLER should be one of `stdout` " + f"or `langchain`, got {default_handler}" + ) + + +def set_tracing_callback_manager(session_name: Optional[str] = None) -> None: + """Set tracing callback manager.""" + handler = SharedLangChainTracer() + callback = get_callback_manager() + callback.set_handlers([handler, StdOutCallbackHandler()]) + if session_name is None: + handler.load_default_session() + else: + try: + handler.load_session(session_name) + except Exception: + raise ValueError(f"session {session_name} not found") + + +@contextmanager +def get_openai_callback() -> Generator[OpenAICallbackHandler, None, None]: + """Get OpenAI callback handler in a context manager.""" + handler = OpenAICallbackHandler() + manager = get_callback_manager() + manager.add_handler(handler) + yield handler + manager.remove_handler(handler) + + +__all__ = [ + "CallbackManager", + "OpenAICallbackHandler", + "SharedCallbackManager", + "StdOutCallbackHandler", + "WandbCallbackHandler", + "get_openai_callback", + "set_tracing_callback_manager", + "set_default_callback_manager", + "set_handler", + "get_callback_manager", +] diff --git a/langchain/callbacks/__pycache__/__init__.cpython-39.pyc b/langchain/callbacks/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30fefff2637254ae13801670fb0511ad52cc2117 Binary files /dev/null and b/langchain/callbacks/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/callbacks/__pycache__/base.cpython-39.pyc b/langchain/callbacks/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d77c4a7e75ebee7066a6cec5a8b981dba8fa1ab7 Binary files /dev/null and b/langchain/callbacks/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/callbacks/__pycache__/openai_info.cpython-39.pyc b/langchain/callbacks/__pycache__/openai_info.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3574df9c05733e7a96e053b2a62e43dbf21bbc3 Binary files /dev/null and b/langchain/callbacks/__pycache__/openai_info.cpython-39.pyc differ diff --git a/langchain/callbacks/__pycache__/shared.cpython-39.pyc b/langchain/callbacks/__pycache__/shared.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1528e893a67fd3464d009fd5c392fea4c4df6e15 Binary files /dev/null and b/langchain/callbacks/__pycache__/shared.cpython-39.pyc differ diff --git a/langchain/callbacks/__pycache__/stdout.cpython-39.pyc b/langchain/callbacks/__pycache__/stdout.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ccd299b260b8df61292f74f2a7ee851cc31f4c5 Binary files /dev/null and b/langchain/callbacks/__pycache__/stdout.cpython-39.pyc differ diff --git a/langchain/callbacks/__pycache__/wandb_callback.cpython-39.pyc b/langchain/callbacks/__pycache__/wandb_callback.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b4699d2df469f3074c9a22ba4e65e8d7cb289d5 Binary files /dev/null and b/langchain/callbacks/__pycache__/wandb_callback.cpython-39.pyc differ diff --git a/langchain/callbacks/base.py b/langchain/callbacks/base.py new file mode 100644 index 0000000000000000000000000000000000000000..0e5ebb9ded90d2c49cc673d6f56e4c076c8cd0b5 --- /dev/null +++ b/langchain/callbacks/base.py @@ -0,0 +1,580 @@ +"""Base callback handler that can be used to handle callbacks from langchain.""" +import asyncio +import functools +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Union + +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class BaseCallbackHandler(ABC): + """Base callback handler that can be used to handle callbacks from langchain.""" + + @property + def always_verbose(self) -> bool: + """Whether to call verbose callbacks even if verbose is False.""" + return False + + @property + def ignore_llm(self) -> bool: + """Whether to ignore LLM callbacks.""" + return False + + @property + def ignore_chain(self) -> bool: + """Whether to ignore chain callbacks.""" + return False + + @property + def ignore_agent(self) -> bool: + """Whether to ignore agent callbacks.""" + return False + + @abstractmethod + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> Any: + """Run when LLM starts running.""" + + @abstractmethod + def on_llm_new_token(self, token: str, **kwargs: Any) -> Any: + """Run on new LLM token. Only available when streaming is enabled.""" + + @abstractmethod + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any: + """Run when LLM ends running.""" + + @abstractmethod + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> Any: + """Run when LLM errors.""" + + @abstractmethod + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> Any: + """Run when chain starts running.""" + + @abstractmethod + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any: + """Run when chain ends running.""" + + @abstractmethod + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> Any: + """Run when chain errors.""" + + @abstractmethod + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> Any: + """Run when tool starts running.""" + + @abstractmethod + def on_tool_end(self, output: str, **kwargs: Any) -> Any: + """Run when tool ends running.""" + + @abstractmethod + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> Any: + """Run when tool errors.""" + + @abstractmethod + def on_text(self, text: str, **kwargs: Any) -> Any: + """Run on arbitrary text.""" + + @abstractmethod + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + + @abstractmethod + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any: + """Run on agent end.""" + + +class BaseCallbackManager(BaseCallbackHandler, ABC): + """Base callback manager that can be used to handle callbacks from LangChain.""" + + @property + def is_async(self) -> bool: + """Whether the callback manager is async.""" + return False + + @abstractmethod + def add_handler(self, callback: BaseCallbackHandler) -> None: + """Add a handler to the callback manager.""" + + @abstractmethod + def remove_handler(self, handler: BaseCallbackHandler) -> None: + """Remove a handler from the callback manager.""" + + def set_handler(self, handler: BaseCallbackHandler) -> None: + """Set handler as the only handler on the callback manager.""" + self.set_handlers([handler]) + + @abstractmethod + def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None: + """Set handlers as the only handlers on the callback manager.""" + + def on_agent_clarify(self, output, color, verbose): + pass + + +class CallbackManager(BaseCallbackManager): + """Callback manager that can be used to handle callbacks from langchain.""" + + def __init__(self, handlers: List[BaseCallbackHandler]) -> None: + """Initialize callback manager.""" + self.handlers: List[BaseCallbackHandler] = handlers + + def on_llm_start( + self, + serialized: Dict[str, Any], + prompts: List[str], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + handler.on_llm_start(serialized, prompts, **kwargs) + + def on_llm_new_token( + self, token: str, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when LLM generates a new token.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + handler.on_llm_new_token(token, **kwargs) + + def on_llm_end( + self, response: LLMResult, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when LLM ends running.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + handler.on_llm_end(response) + + def on_llm_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when LLM errors.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + handler.on_llm_error(error) + + def on_chain_start( + self, + serialized: Dict[str, Any], + inputs: Dict[str, Any], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when chain starts running.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + handler.on_chain_start(serialized, inputs, **kwargs) + + def on_chain_end( + self, outputs: Dict[str, Any], verbose: bool = False, **kwargs: Any + ) -> None: + """Run when chain ends running.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + handler.on_chain_end(outputs) + + def on_chain_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when chain errors.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + handler.on_chain_error(error) + + def on_tool_start( + self, + serialized: Dict[str, Any], + input_str: str, + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when tool starts running.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + handler.on_tool_start(serialized, input_str, **kwargs) + + def on_agent_action( + self, action: AgentAction, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + handler.on_agent_action(action, **kwargs) + + def on_tool_end(self, output: str, verbose: bool = False, **kwargs: Any) -> None: + """Run when tool ends running.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + handler.on_tool_end(output, **kwargs) + + def on_tool_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when tool errors.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + handler.on_tool_error(error) + + def on_text(self, text: str, verbose: bool = False, **kwargs: Any) -> None: + """Run on additional input from chains and agents.""" + for handler in self.handlers: + if verbose or handler.always_verbose: + handler.on_text(text, **kwargs) + + def on_agent_finish( + self, finish: AgentFinish, verbose: bool = False, **kwargs: Any + ) -> None: + """Run on agent end.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + handler.on_agent_finish(finish, **kwargs) + + def add_handler(self, handler: BaseCallbackHandler) -> None: + """Add a handler to the callback manager.""" + self.handlers.append(handler) + + def remove_handler(self, handler: BaseCallbackHandler) -> None: + """Remove a handler from the callback manager.""" + self.handlers.remove(handler) + + def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None: + """Set handlers as the only handlers on the callback manager.""" + self.handlers = handlers + + +class AsyncCallbackHandler(BaseCallbackHandler): + """Async callback handler that can be used to handle callbacks from langchain.""" + + async def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + + async def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run on new LLM token. Only available when streaming is enabled.""" + + async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + + async def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + + async def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + + async def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + + async def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + + async def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + + async def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + + async def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + + async def on_text(self, text: str, **kwargs: Any) -> None: + """Run on arbitrary text.""" + + async def on_agent_action(self, action: AgentAction, **kwargs: Any) -> None: + """Run on agent action.""" + + async def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run on agent end.""" + + +class AsyncCallbackManager(BaseCallbackManager): + """Async callback manager that can be used to handle callbacks from LangChain.""" + + @property + def is_async(self) -> bool: + """Return whether the handler is async.""" + return True + + def __init__(self, handlers: List[BaseCallbackHandler]) -> None: + """Initialize callback manager.""" + self.handlers: List[BaseCallbackHandler] = handlers + + async def on_llm_start( + self, + serialized: Dict[str, Any], + prompts: List[str], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_llm_start): + await handler.on_llm_start(serialized, prompts, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_llm_start, serialized, prompts, **kwargs + ), + ) + + async def on_llm_new_token( + self, token: str, verbose: bool = False, **kwargs: Any + ) -> None: + """Run on new LLM token. Only available when streaming is enabled.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_llm_new_token): + await handler.on_llm_new_token(token, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_llm_new_token, token, **kwargs + ), + ) + + async def on_llm_end( + self, response: LLMResult, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when LLM ends running.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_llm_end): + await handler.on_llm_end(response, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_llm_end, response, **kwargs), + ) + + async def on_llm_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when LLM errors.""" + for handler in self.handlers: + if not handler.ignore_llm: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_llm_error): + await handler.on_llm_error(error, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_llm_error, error, **kwargs), + ) + + async def on_chain_start( + self, + serialized: Dict[str, Any], + inputs: Dict[str, Any], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when chain starts running.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_chain_start): + await handler.on_chain_start(serialized, inputs, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_chain_start, serialized, inputs, **kwargs + ), + ) + + async def on_chain_end( + self, outputs: Dict[str, Any], verbose: bool = False, **kwargs: Any + ) -> None: + """Run when chain ends running.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_chain_end): + await handler.on_chain_end(outputs, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_chain_end, outputs, **kwargs), + ) + + async def on_chain_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when chain errors.""" + for handler in self.handlers: + if not handler.ignore_chain: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_chain_error): + await handler.on_chain_error(error, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_chain_error, error, **kwargs), + ) + + async def on_tool_start( + self, + serialized: Dict[str, Any], + input_str: str, + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when tool starts running.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_tool_start): + await handler.on_tool_start(serialized, input_str, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_tool_start, serialized, input_str, **kwargs + ), + ) + + async def on_tool_end( + self, output: str, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when tool ends running.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_tool_end): + await handler.on_tool_end(output, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_tool_end, output, **kwargs), + ) + + async def on_tool_error( + self, + error: Union[Exception, KeyboardInterrupt], + verbose: bool = False, + **kwargs: Any + ) -> None: + """Run when tool errors.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_tool_error): + await handler.on_tool_error(error, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial(handler.on_tool_error, error, **kwargs), + ) + + async def on_text(self, text: str, verbose: bool = False, **kwargs: Any) -> None: + """Run when text is printed.""" + for handler in self.handlers: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_text): + await handler.on_text(text, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, functools.partial(handler.on_text, text, **kwargs) + ) + + async def on_agent_action( + self, action: AgentAction, verbose: bool = False, **kwargs: Any + ) -> None: + """Run on agent action.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_agent_action): + await handler.on_agent_action(action, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_agent_action, action, **kwargs + ), + ) + + async def on_agent_finish( + self, finish: AgentFinish, verbose: bool = False, **kwargs: Any + ) -> None: + """Run when agent finishes.""" + for handler in self.handlers: + if not handler.ignore_agent: + if verbose or handler.always_verbose: + if asyncio.iscoroutinefunction(handler.on_agent_finish): + await handler.on_agent_finish(finish, **kwargs) + else: + await asyncio.get_event_loop().run_in_executor( + None, + functools.partial( + handler.on_agent_finish, finish, **kwargs + ), + ) + + def add_handler(self, handler: BaseCallbackHandler) -> None: + """Add a handler to the callback manager.""" + self.handlers.append(handler) + + def remove_handler(self, handler: BaseCallbackHandler) -> None: + """Remove a handler from the callback manager.""" + self.handlers.remove(handler) + + def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None: + """Set handlers as the only handlers on the callback manager.""" + self.handlers = handlers diff --git a/langchain/callbacks/openai_info.py b/langchain/callbacks/openai_info.py new file mode 100644 index 0000000000000000000000000000000000000000..d8cebe585b49d8de9d98d4e8e4e5549aade359aa --- /dev/null +++ b/langchain/callbacks/openai_info.py @@ -0,0 +1,102 @@ +"""Callback Handler that prints to std out.""" +from typing import Any, Dict, List, Optional, Union + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class OpenAICallbackHandler(BaseCallbackHandler): + """Callback Handler that tracks OpenAI info.""" + + total_tokens: int = 0 + + @property + def always_verbose(self) -> bool: + """Whether to call verbose callbacks even if verbose is False.""" + return True + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Print out the prompts.""" + pass + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Print out the token.""" + pass + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Collect token usage.""" + if response.llm_output is not None: + if "token_usage" in response.llm_output: + token_usage = response.llm_output["token_usage"] + if "total_tokens" in token_usage: + self.total_tokens += token_usage["total_tokens"] + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Print out that we are entering a chain.""" + pass + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Print out that we finished a chain.""" + pass + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_tool_start( + self, + serialized: Dict[str, Any], + input_str: str, + **kwargs: Any, + ) -> None: + """Print out the log in specified color.""" + pass + + def on_tool_end( + self, + output: str, + color: Optional[str] = None, + observation_prefix: Optional[str] = None, + llm_prefix: Optional[str] = None, + **kwargs: Any, + ) -> None: + """If not the final action, print out observation.""" + pass + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_text( + self, + text: str, + color: Optional[str] = None, + end: str = "", + **kwargs: Optional[str], + ) -> None: + """Run when agent ends.""" + pass + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + pass + + def on_agent_finish( + self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any + ) -> None: + """Run on agent end.""" + pass diff --git a/langchain/callbacks/shared.py b/langchain/callbacks/shared.py new file mode 100644 index 0000000000000000000000000000000000000000..225b183e6b342b51b087d3411440ba9dc16c9bea --- /dev/null +++ b/langchain/callbacks/shared.py @@ -0,0 +1,127 @@ +"""A shared CallbackManager.""" + +import threading +from typing import Any, Dict, List, Union + +from langchain.callbacks.base import ( + BaseCallbackHandler, + BaseCallbackManager, + CallbackManager, +) +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class Singleton: + """A thread-safe singleton class that can be inherited from.""" + + _instance = None + _lock = threading.Lock() + + def __new__(cls) -> Any: + """Create a new shared instance of the class.""" + if cls._instance is None: + with cls._lock: + # Another thread could have created the instance + # before we acquired the lock. So check that the + # instance is still nonexistent. + if not cls._instance: + cls._instance = super().__new__(cls) + return cls._instance + + +class SharedCallbackManager(Singleton, BaseCallbackManager): + """A thread-safe singleton CallbackManager.""" + + _callback_manager: CallbackManager = CallbackManager(handlers=[]) + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + with self._lock: + self._callback_manager.on_llm_start(serialized, prompts, **kwargs) + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + with self._lock: + self._callback_manager.on_llm_end(response, **kwargs) + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run when LLM generates a new token.""" + with self._lock: + self._callback_manager.on_llm_new_token(token, **kwargs) + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + with self._lock: + self._callback_manager.on_llm_error(error, **kwargs) + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + with self._lock: + self._callback_manager.on_chain_start(serialized, inputs, **kwargs) + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + with self._lock: + self._callback_manager.on_chain_end(outputs, **kwargs) + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + with self._lock: + self._callback_manager.on_chain_error(error, **kwargs) + + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + with self._lock: + self._callback_manager.on_tool_start(serialized, input_str, **kwargs) + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + with self._lock: + self._callback_manager.on_agent_action(action, **kwargs) + + def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + with self._lock: + self._callback_manager.on_tool_end(output, **kwargs) + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + with self._lock: + self._callback_manager.on_tool_error(error, **kwargs) + + def on_text(self, text: str, **kwargs: Any) -> None: + """Run on arbitrary text.""" + with self._lock: + self._callback_manager.on_text(text, **kwargs) + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run on agent end.""" + with self._lock: + self._callback_manager.on_agent_finish(finish, **kwargs) + + def add_handler(self, callback: BaseCallbackHandler) -> None: + """Add a callback to the callback manager.""" + with self._lock: + self._callback_manager.add_handler(callback) + + def remove_handler(self, callback: BaseCallbackHandler) -> None: + """Remove a callback from the callback manager.""" + with self._lock: + self._callback_manager.remove_handler(callback) + + def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None: + """Set handlers as the only handlers on the callback manager.""" + with self._lock: + self._callback_manager.handlers = handlers diff --git a/langchain/callbacks/stdout.py b/langchain/callbacks/stdout.py new file mode 100644 index 0000000000000000000000000000000000000000..70867367b5ed9527a4585bd6be4366d220cf3751 --- /dev/null +++ b/langchain/callbacks/stdout.py @@ -0,0 +1,101 @@ +"""Callback Handler that prints to std out.""" +from typing import Any, Dict, List, Optional, Union + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.input import print_text +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class StdOutCallbackHandler(BaseCallbackHandler): + """Callback Handler that prints to std out.""" + + def __init__(self, color: Optional[str] = None) -> None: + """Initialize callback handler.""" + self.color = color + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Print out the prompts.""" + pass + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Do nothing.""" + pass + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Do nothing.""" + pass + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Print out that we are entering a chain.""" + class_name = serialized["name"] + print(f"\n\n\033[1m> Entering new {class_name} chain...\033[0m") + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Print out that we finished a chain.""" + print("\n\033[1m> Finished chain.\033[0m") + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_tool_start( + self, + serialized: Dict[str, Any], + input_str: str, + **kwargs: Any, + ) -> None: + """Do nothing.""" + pass + + def on_agent_action( + self, action: AgentAction, color: Optional[str] = None, **kwargs: Any + ) -> Any: + """Run on agent action.""" + print_text(action.log, color=color if color else self.color) + + def on_tool_end( + self, + output: str, + color: Optional[str] = None, + observation_prefix: Optional[str] = None, + llm_prefix: Optional[str] = None, + **kwargs: Any, + ) -> None: + """If not the final action, print out observation.""" + print_text(f"\n{observation_prefix}") + print_text(output, color=color if color else self.color) + print_text(f"\n{llm_prefix}") + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_text( + self, + text: str, + color: Optional[str] = None, + end: str = "", + **kwargs: Optional[str], + ) -> None: + """Run when agent ends.""" + print_text(text, color=color if color else self.color, end=end) + + def on_agent_finish( + self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any + ) -> None: + """Run on agent end.""" + print_text(finish.log, color=color if self.color else color, end="\n") diff --git a/langchain/callbacks/streaming_stdout.py b/langchain/callbacks/streaming_stdout.py new file mode 100644 index 0000000000000000000000000000000000000000..4acde4cebf075da2ee8b610b33ab5f3c8f1bc0d8 --- /dev/null +++ b/langchain/callbacks/streaming_stdout.py @@ -0,0 +1,64 @@ +"""Callback Handler streams to stdout on new llm token.""" +import sys +from typing import Any, Dict, List, Union + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class StreamingStdOutCallbackHandler(BaseCallbackHandler): + """Callback handler for streaming. Only works with LLMs that support streaming.""" + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run on new LLM token. Only available when streaming is enabled.""" + sys.stdout.write(token) + sys.stdout.flush() + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + pass + + def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + + def on_text(self, text: str, **kwargs: Any) -> None: + """Run on arbitrary text.""" + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run on agent end.""" diff --git a/langchain/callbacks/streamlit.py b/langchain/callbacks/streamlit.py new file mode 100644 index 0000000000000000000000000000000000000000..a603765f5ce506a7e1cf3ecba37cea2f354621ad --- /dev/null +++ b/langchain/callbacks/streamlit.py @@ -0,0 +1,91 @@ +"""Callback Handler that logs to streamlit.""" +from typing import Any, Dict, List, Optional, Union + +import streamlit as st + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class StreamlitCallbackHandler(BaseCallbackHandler): + """Callback Handler that logs to streamlit.""" + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Print out the prompts.""" + st.write("Prompts after formatting:") + for prompt in prompts: + st.write(prompt) + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Do nothing.""" + pass + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Do nothing.""" + pass + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Print out that we are entering a chain.""" + class_name = serialized["name"] + st.write(f"Entering new {class_name} chain...") + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Print out that we finished a chain.""" + st.write("Finished chain.") + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_tool_start( + self, + serialized: Dict[str, Any], + input_str: str, + **kwargs: Any, + ) -> None: + """Print out the log in specified color.""" + pass + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + # st.write requires two spaces before a newline to render it + st.markdown(action.log.replace("\n", " \n")) + + def on_tool_end( + self, + output: str, + observation_prefix: Optional[str] = None, + llm_prefix: Optional[str] = None, + **kwargs: Any, + ) -> None: + """If not the final action, print out observation.""" + st.write(f"{observation_prefix}{output}") + st.write(llm_prefix) + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Do nothing.""" + pass + + def on_text(self, text: str, **kwargs: Any) -> None: + """Run on text.""" + # st.write requires two spaces before a newline to render it + st.write(text.replace("\n", " \n")) + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run on agent end.""" + # st.write requires two spaces before a newline to render it + st.write(finish.log.replace("\n", " \n")) diff --git a/langchain/callbacks/tracers/__init__.py b/langchain/callbacks/tracers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8db5367fdf583477f7ea45a5b17362c2656ec753 --- /dev/null +++ b/langchain/callbacks/tracers/__init__.py @@ -0,0 +1,12 @@ +"""Tracers that record execution of LangChain runs.""" + +from langchain.callbacks.tracers.base import SharedTracer, Tracer +from langchain.callbacks.tracers.langchain import BaseLangChainTracer + + +class SharedLangChainTracer(SharedTracer, BaseLangChainTracer): + """Shared tracer that records LangChain execution to LangChain endpoint.""" + + +class LangChainTracer(Tracer, BaseLangChainTracer): + """Tracer that records LangChain execution to LangChain endpoint.""" diff --git a/langchain/callbacks/tracers/__pycache__/__init__.cpython-39.pyc b/langchain/callbacks/tracers/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4c8fe27cbd10ffd4249af29f087255633e18d763 Binary files /dev/null and b/langchain/callbacks/tracers/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/callbacks/tracers/__pycache__/base.cpython-39.pyc b/langchain/callbacks/tracers/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f63c67a0eec82e6ce8abf51ef781cf9f3a8ef5ea Binary files /dev/null and b/langchain/callbacks/tracers/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/callbacks/tracers/__pycache__/langchain.cpython-39.pyc b/langchain/callbacks/tracers/__pycache__/langchain.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8302f675ec43b58c9cdb8c329c13a4c130caf646 Binary files /dev/null and b/langchain/callbacks/tracers/__pycache__/langchain.cpython-39.pyc differ diff --git a/langchain/callbacks/tracers/__pycache__/schemas.cpython-39.pyc b/langchain/callbacks/tracers/__pycache__/schemas.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd33ec6a9e71da683cf760dc9b75e5f7712ea5e0 Binary files /dev/null and b/langchain/callbacks/tracers/__pycache__/schemas.cpython-39.pyc differ diff --git a/langchain/callbacks/tracers/base.py b/langchain/callbacks/tracers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..2a99c1c824c4614ed666ab372da402c07c72778d --- /dev/null +++ b/langchain/callbacks/tracers/base.py @@ -0,0 +1,343 @@ +"""Base interfaces for tracing runs.""" +from __future__ import annotations + +import threading +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Union + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.callbacks.shared import Singleton +from langchain.callbacks.tracers.schemas import ( + ChainRun, + LLMRun, + ToolRun, + TracerSession, + TracerSessionCreate, +) +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class TracerException(Exception): + """Base class for exceptions in tracers module.""" + + +class BaseTracer(BaseCallbackHandler, ABC): + """Base interface for tracers.""" + + @abstractmethod + def _add_child_run( + self, + parent_run: Union[ChainRun, ToolRun], + child_run: Union[LLMRun, ChainRun, ToolRun], + ) -> None: + """Add child run to a chain run or tool run.""" + + @abstractmethod + def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None: + """Persist a run.""" + + @abstractmethod + def _persist_session(self, session: TracerSessionCreate) -> TracerSession: + """Persist a tracing session.""" + + @abstractmethod + def _generate_id(self) -> Optional[Union[int, str]]: + """Generate an id for a run.""" + + def new_session(self, name: Optional[str] = None, **kwargs: Any) -> TracerSession: + """NOT thread safe, do not call this method from multiple threads.""" + session_create = TracerSessionCreate(name=name, extra=kwargs) + session = self._persist_session(session_create) + self._session = session + return session + + @abstractmethod + def load_session(self, session_name: str) -> TracerSession: + """Load a tracing session and set it as the Tracer's session.""" + + @abstractmethod + def load_default_session(self) -> TracerSession: + """Load the default tracing session and set it as the Tracer's session.""" + + @property + @abstractmethod + def _stack(self) -> List[Union[LLMRun, ChainRun, ToolRun]]: + """Get the tracer stack.""" + + @property + @abstractmethod + def _execution_order(self) -> int: + """Get the execution order for a run.""" + + @_execution_order.setter + @abstractmethod + def _execution_order(self, value: int) -> None: + """Set the execution order for a run.""" + + @property + @abstractmethod + def _session(self) -> Optional[TracerSession]: + """Get the tracing session.""" + + @_session.setter + @abstractmethod + def _session(self, value: TracerSession) -> None: + """Set the tracing session.""" + + def _start_trace(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None: + """Start a trace for a run.""" + self._execution_order += 1 + + if self._stack: + if not ( + isinstance(self._stack[-1], ChainRun) + or isinstance(self._stack[-1], ToolRun) + ): + raise TracerException( + f"Nested {run.__class__.__name__} can only be" + f" logged inside a ChainRun or ToolRun" + ) + self._add_child_run(self._stack[-1], run) + self._stack.append(run) + + def _end_trace(self) -> None: + """End a trace for a run.""" + run = self._stack.pop() + if not self._stack: + self._execution_order = 1 + self._persist_run(run) + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Start a trace for an LLM run.""" + if self._session is None: + raise TracerException( + "Initialize a session with `new_session()` before starting a trace." + ) + + llm_run = LLMRun( + serialized=serialized, + prompts=prompts, + extra=kwargs, + start_time=datetime.utcnow(), + execution_order=self._execution_order, + session_id=self._session.id, + id=self._generate_id(), + ) + self._start_trace(llm_run) + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Handle a new token for an LLM run.""" + pass + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """End a trace for an LLM run.""" + if not self._stack or not isinstance(self._stack[-1], LLMRun): + raise TracerException("No LLMRun found to be traced") + + self._stack[-1].end_time = datetime.utcnow() + self._stack[-1].response = response + + self._end_trace() + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Handle an error for an LLM run.""" + if not self._stack or not isinstance(self._stack[-1], LLMRun): + raise TracerException("No LLMRun found to be traced") + + self._stack[-1].error = repr(error) + self._stack[-1].end_time = datetime.utcnow() + + self._end_trace() + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Start a trace for a chain run.""" + if self._session is None: + raise TracerException( + "Initialize a session with `new_session()` before starting a trace." + ) + + chain_run = ChainRun( + serialized=serialized, + inputs=inputs, + extra=kwargs, + start_time=datetime.utcnow(), + execution_order=self._execution_order, + child_runs=[], + session_id=self._session.id, + id=self._generate_id(), + ) + self._start_trace(chain_run) + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """End a trace for a chain run.""" + if not self._stack or not isinstance(self._stack[-1], ChainRun): + raise TracerException("No ChainRun found to be traced") + + self._stack[-1].end_time = datetime.utcnow() + self._stack[-1].outputs = outputs + + self._end_trace() + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Handle an error for a chain run.""" + if not self._stack or not isinstance(self._stack[-1], ChainRun): + raise TracerException("No ChainRun found to be traced") + + self._stack[-1].end_time = datetime.utcnow() + self._stack[-1].error = repr(error) + + self._end_trace() + + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Start a trace for a tool run.""" + if self._session is None: + raise TracerException( + "Initialize a session with `new_session()` before starting a trace." + ) + + tool_run = ToolRun( + serialized=serialized, + # TODO: this is duplicate info as above, not needed. + action=str(serialized), + tool_input=input_str, + extra=kwargs, + start_time=datetime.utcnow(), + execution_order=self._execution_order, + child_runs=[], + session_id=self._session.id, + id=self._generate_id(), + ) + self._start_trace(tool_run) + + def on_tool_end(self, output: str, **kwargs: Any) -> None: + """End a trace for a tool run.""" + if not self._stack or not isinstance(self._stack[-1], ToolRun): + raise TracerException("No ToolRun found to be traced") + + self._stack[-1].end_time = datetime.utcnow() + self._stack[-1].output = output + + self._end_trace() + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Handle an error for a tool run.""" + if not self._stack or not isinstance(self._stack[-1], ToolRun): + raise TracerException("No ToolRun found to be traced") + + self._stack[-1].end_time = datetime.utcnow() + self._stack[-1].error = repr(error) + + self._end_trace() + + def on_text(self, text: str, **kwargs: Any) -> None: + """Handle a text message.""" + pass + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Handle an agent finish message.""" + pass + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Do nothing.""" + pass + + +class Tracer(BaseTracer, ABC): + """A non-thread safe implementation of the BaseTracer interface.""" + + def __init__(self) -> None: + """Initialize a tracer.""" + self._tracer_stack: List[Union[LLMRun, ChainRun, ToolRun]] = [] + self._tracer_execution_order = 1 + self._tracer_session: Optional[TracerSession] = None + + @property + def _stack(self) -> List[Union[LLMRun, ChainRun, ToolRun]]: + """Get the tracer stack.""" + return self._tracer_stack + + @property + def _execution_order(self) -> int: + """Get the execution order for a run.""" + return self._tracer_execution_order + + @_execution_order.setter + def _execution_order(self, value: int) -> None: + """Set the execution order for a run.""" + self._tracer_execution_order = value + + @property + def _session(self) -> Optional[TracerSession]: + """Get the tracing session.""" + return self._tracer_session + + @_session.setter + def _session(self, value: TracerSession) -> None: + """Set the tracing session.""" + if self._stack: + raise TracerException( + "Cannot set a session while a trace is being recorded" + ) + self._tracer_session = value + + +@dataclass +class TracerStack(threading.local): + """A stack of runs used for logging.""" + + stack: List[Union[LLMRun, ChainRun, ToolRun]] = field(default_factory=list) + execution_order: int = 1 + + +class SharedTracer(Singleton, BaseTracer, ABC): + """A thread-safe Singleton implementation of BaseTracer.""" + + _tracer_stack = TracerStack() + _tracer_session = None + + @property + def _stack(self) -> List[Union[LLMRun, ChainRun, ToolRun]]: + """Get the tracer stack.""" + return self._tracer_stack.stack + + @property + def _execution_order(self) -> int: + """Get the execution order for a run.""" + return self._tracer_stack.execution_order + + @_execution_order.setter + def _execution_order(self, value: int) -> None: + """Set the execution order for a run.""" + self._tracer_stack.execution_order = value + + @property + def _session(self) -> Optional[TracerSession]: + """Get the tracing session.""" + return self._tracer_session + + @_session.setter + def _session(self, value: TracerSession) -> None: + """Set the tracing session.""" + with self._lock: + # TODO: currently, we are only checking current thread's stack. + # Need to make sure that we are not in the middle of a trace + # in any thread. + if self._stack: + raise TracerException( + "Cannot set a session while a trace is being recorded" + ) + self._tracer_session = value diff --git a/langchain/callbacks/tracers/langchain.py b/langchain/callbacks/tracers/langchain.py new file mode 100644 index 0000000000000000000000000000000000000000..d25022041aab179418c71157045270063f2c8648 --- /dev/null +++ b/langchain/callbacks/tracers/langchain.py @@ -0,0 +1,112 @@ +"""A Tracer implementation that records to LangChain endpoint.""" +from __future__ import annotations + +import logging +import os +from abc import ABC +from typing import Any, Dict, Optional, Union + +import requests + +from langchain.callbacks.tracers.base import BaseTracer +from langchain.callbacks.tracers.schemas import ( + ChainRun, + LLMRun, + ToolRun, + TracerSession, + TracerSessionCreate, +) + + +class BaseLangChainTracer(BaseTracer, ABC): + """An implementation of the SharedTracer that POSTS to the langchain endpoint.""" + + always_verbose: bool = True + _endpoint: str = os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000") + _headers: Dict[str, Any] = {"Content-Type": "application/json"} + if os.getenv("LANGCHAIN_API_KEY"): + _headers["x-api-key"] = os.getenv("LANGCHAIN_API_KEY") + + def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None: + """Persist a run.""" + if isinstance(run, LLMRun): + endpoint = f"{self._endpoint}/llm-runs" + elif isinstance(run, ChainRun): + endpoint = f"{self._endpoint}/chain-runs" + else: + endpoint = f"{self._endpoint}/tool-runs" + + try: + requests.post( + endpoint, + data=run.json(), + headers=self._headers, + ) + except Exception as e: + logging.warning(f"Failed to persist run: {e}") + + def _persist_session(self, session_create: TracerSessionCreate) -> TracerSession: + """Persist a session.""" + try: + r = requests.post( + f"{self._endpoint}/sessions", + data=session_create.json(), + headers=self._headers, + ) + session = TracerSession(id=r.json()["id"], **session_create.dict()) + except Exception as e: + logging.warning(f"Failed to create session, using default session: {e}") + session = TracerSession(id=1, **session_create.dict()) + return session + + def load_session(self, session_name: str) -> TracerSession: + """Load a session from the tracer.""" + try: + r = requests.get( + f"{self._endpoint}/sessions?name={session_name}", + headers=self._headers, + ) + tracer_session = TracerSession(**r.json()[0]) + self._session = tracer_session + return tracer_session + except Exception as e: + logging.warning( + f"Failed to load session {session_name}, using empty session: {e}" + ) + tracer_session = TracerSession(id=1) + self._session = tracer_session + return tracer_session + + def load_default_session(self) -> TracerSession: + """Load the default tracing session and set it as the Tracer's session.""" + try: + r = requests.get( + f"{self._endpoint}/sessions", + headers=self._headers, + ) + # Use the first session result + tracer_session = TracerSession(**r.json()[0]) + self._session = tracer_session + return tracer_session + except Exception as e: + logging.warning(f"Failed to default session, using empty session: {e}") + tracer_session = TracerSession(id=1) + self._session = tracer_session + return tracer_session + + def _add_child_run( + self, + parent_run: Union[ChainRun, ToolRun], + child_run: Union[LLMRun, ChainRun, ToolRun], + ) -> None: + """Add child run to a chain run or tool run.""" + if isinstance(child_run, LLMRun): + parent_run.child_llm_runs.append(child_run) + elif isinstance(child_run, ChainRun): + parent_run.child_chain_runs.append(child_run) + else: + parent_run.child_tool_runs.append(child_run) + + def _generate_id(self) -> Optional[Union[int, str]]: + """Generate an id for a run.""" + return None diff --git a/langchain/callbacks/tracers/schemas.py b/langchain/callbacks/tracers/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..bb77d747e7c8b2cd5072ffbfa5c6073a1fcf69f5 --- /dev/null +++ b/langchain/callbacks/tracers/schemas.py @@ -0,0 +1,76 @@ +"""Schemas for tracers.""" +from __future__ import annotations + +import datetime +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Field + +from langchain.schema import LLMResult + + +class TracerSessionBase(BaseModel): + """Base class for TracerSession.""" + + start_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) + name: Optional[str] = None + extra: Optional[Dict[str, Any]] = None + + +class TracerSessionCreate(TracerSessionBase): + """Create class for TracerSession.""" + + pass + + +class TracerSession(TracerSessionBase): + """TracerSession schema.""" + + id: int + + +class BaseRun(BaseModel): + """Base class for Run.""" + + id: Optional[Union[int, str]] = None + start_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) + end_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) + extra: Optional[Dict[str, Any]] = None + execution_order: int + serialized: Dict[str, Any] + session_id: int + error: Optional[str] = None + + +class LLMRun(BaseRun): + """Class for LLMRun.""" + + prompts: List[str] + response: Optional[LLMResult] = None + + +class ChainRun(BaseRun): + """Class for ChainRun.""" + + inputs: Dict[str, Any] + outputs: Optional[Dict[str, Any]] = None + child_llm_runs: List[LLMRun] = Field(default_factory=list) + child_chain_runs: List[ChainRun] = Field(default_factory=list) + child_tool_runs: List[ToolRun] = Field(default_factory=list) + child_runs: List[Union[LLMRun, ChainRun, ToolRun]] = Field(default_factory=list) + + +class ToolRun(BaseRun): + """Class for ToolRun.""" + + tool_input: str + output: Optional[str] = None + action: str + child_llm_runs: List[LLMRun] = Field(default_factory=list) + child_chain_runs: List[ChainRun] = Field(default_factory=list) + child_tool_runs: List[ToolRun] = Field(default_factory=list) + child_runs: List[Union[LLMRun, ChainRun, ToolRun]] = Field(default_factory=list) + + +ChainRun.update_forward_refs() +ToolRun.update_forward_refs() diff --git a/langchain/callbacks/wandb_callback.py b/langchain/callbacks/wandb_callback.py new file mode 100644 index 0000000000000000000000000000000000000000..8527edc13aebaf6722e60dc90c68a2480ce0b874 --- /dev/null +++ b/langchain/callbacks/wandb_callback.py @@ -0,0 +1,821 @@ +import hashlib +import json +import tempfile +from copy import deepcopy +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +def import_wandb() -> Any: + try: + import wandb # noqa: F401 + except ImportError: + raise ImportError( + "To use the wandb callback manager you need to have the `wandb` python " + "package installed. Please install it with `pip install wandb`" + ) + return wandb + + +def import_spacy() -> Any: + try: + import spacy # noqa: F401 + except ImportError: + raise ImportError( + "To use the wandb callback manager you need to have the `spacy` python " + "package installed. Please install it with `pip install spacy`" + ) + return spacy + + +def import_pandas() -> Any: + try: + import pandas # noqa: F401 + except ImportError: + raise ImportError( + "To use the wandb callback manager you need to have the `pandas` python " + "package installed. Please install it with `pip install pandas`" + ) + return pandas + + +def import_textstat() -> Any: + try: + import textstat # noqa: F401 + except ImportError: + raise ImportError( + "To use the wandb callback manager you need to have the `textstat` python " + "package installed. Please install it with `pip install textstat`" + ) + return textstat + + +def _flatten_dict( + nested_dict: Dict[str, Any], parent_key: str = "", sep: str = "_" +) -> Iterable[Tuple[str, Any]]: + """ + Generator that yields flattened items from a nested dictionary for a flat dict. + + Parameters: + nested_dict (dict): The nested dictionary to flatten. + parent_key (str): The prefix to prepend to the keys of the flattened dict. + sep (str): The separator to use between the parent key and the key of the + flattened dictionary. + + Yields: + (str, any): A key-value pair from the flattened dictionary. + """ + for key, value in nested_dict.items(): + new_key = parent_key + sep + key if parent_key else key + if isinstance(value, dict): + yield from _flatten_dict(value, new_key, sep) + else: + yield new_key, value + + +def flatten_dict( + nested_dict: Dict[str, Any], parent_key: str = "", sep: str = "_" +) -> Dict[str, Any]: + """Flattens a nested dictionary into a flat dictionary. + + Parameters: + nested_dict (dict): The nested dictionary to flatten. + parent_key (str): The prefix to prepend to the keys of the flattened dict. + sep (str): The separator to use between the parent key and the key of the + flattened dictionary. + + Returns: + (dict): A flat dictionary. + + """ + flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)} + return flat_dict + + +def hash_string(s: str) -> str: + """Hash a string using sha1. + + Parameters: + s (str): The string to hash. + + Returns: + (str): The hashed string. + """ + return hashlib.sha1(s.encode("utf-8")).hexdigest() + + +def load_json_to_dict(json_path: Union[str, Path]) -> dict: + """Load json file to a dictionary. + + Parameters: + json_path (str): The path to the json file. + + Returns: + (dict): The dictionary representation of the json file. + """ + with open(json_path, "r") as f: + data = json.load(f) + return data + + +def analyze_text( + text: str, + complexity_metrics: bool = True, + visualize: bool = True, + nlp: Any = None, + output_dir: Optional[Union[str, Path]] = None, +) -> dict: + """Analyze text using textstat and spacy. + + Parameters: + text (str): The text to analyze. + complexity_metrics (bool): Whether to compute complexity metrics. + visualize (bool): Whether to visualize the text. + nlp (spacy.lang): The spacy language model to use for visualization. + output_dir (str): The directory to save the visualization files to. + + Returns: + (dict): A dictionary containing the complexity metrics and visualization + files serialized in a wandb.Html element. + """ + resp = {} + textstat = import_textstat() + wandb = import_wandb() + spacy = import_spacy() + if complexity_metrics: + text_complexity_metrics = { + "flesch_reading_ease": textstat.flesch_reading_ease(text), + "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text), + "smog_index": textstat.smog_index(text), + "coleman_liau_index": textstat.coleman_liau_index(text), + "automated_readability_index": textstat.automated_readability_index(text), + "dale_chall_readability_score": textstat.dale_chall_readability_score(text), + "difficult_words": textstat.difficult_words(text), + "linsear_write_formula": textstat.linsear_write_formula(text), + "gunning_fog": textstat.gunning_fog(text), + "text_standard": textstat.text_standard(text), + "fernandez_huerta": textstat.fernandez_huerta(text), + "szigriszt_pazos": textstat.szigriszt_pazos(text), + "gutierrez_polini": textstat.gutierrez_polini(text), + "crawford": textstat.crawford(text), + "gulpease_index": textstat.gulpease_index(text), + "osman": textstat.osman(text), + } + resp.update(text_complexity_metrics) + + if visualize and nlp and output_dir is not None: + doc = nlp(text) + + dep_out = spacy.displacy.render( # type: ignore + doc, style="dep", jupyter=False, page=True + ) + dep_output_path = Path(output_dir, hash_string(f"dep-{text}") + ".html") + dep_output_path.open("w", encoding="utf-8").write(dep_out) + + ent_out = spacy.displacy.render( # type: ignore + doc, style="ent", jupyter=False, page=True + ) + ent_output_path = Path(output_dir, hash_string(f"ent-{text}") + ".html") + ent_output_path.open("w", encoding="utf-8").write(ent_out) + + text_visualizations = { + "dependency_tree": wandb.Html(str(dep_output_path)), + "entities": wandb.Html(str(ent_output_path)), + } + resp.update(text_visualizations) + + return resp + + +def construct_html_from_prompt_and_generation(prompt: str, generation: str) -> Any: + """Construct an html element from a prompt and a generation. + + Parameters: + prompt (str): The prompt. + generation (str): The generation. + + Returns: + (wandb.Html): The html element.""" + wandb = import_wandb() + formatted_prompt = prompt.replace("\n", "
") + formatted_generation = generation.replace("\n", "
") + + return wandb.Html( + f""" +

{formatted_prompt}:

+
+

+ {formatted_generation} +

+
+ """, + inject=False, + ) + + +class BaseMetadataCallbackHandler: + """This class handles the metadata and associated function states for callbacks. + + Attributes: + step (int): The current step. + starts (int): The number of times the start method has been called. + ends (int): The number of times the end method has been called. + errors (int): The number of times the error method has been called. + text_ctr (int): The number of times the text method has been called. + ignore_llm_ (bool): Whether to ignore llm callbacks. + ignore_chain_ (bool): Whether to ignore chain callbacks. + ignore_agent_ (bool): Whether to ignore agent callbacks. + always_verbose_ (bool): Whether to always be verbose. + chain_starts (int): The number of times the chain start method has been called. + chain_ends (int): The number of times the chain end method has been called. + llm_starts (int): The number of times the llm start method has been called. + llm_ends (int): The number of times the llm end method has been called. + llm_streams (int): The number of times the text method has been called. + tool_starts (int): The number of times the tool start method has been called. + tool_ends (int): The number of times the tool end method has been called. + agent_ends (int): The number of times the agent end method has been called. + on_llm_start_records (list): A list of records of the on_llm_start method. + on_llm_token_records (list): A list of records of the on_llm_token method. + on_llm_end_records (list): A list of records of the on_llm_end method. + on_chain_start_records (list): A list of records of the on_chain_start method. + on_chain_end_records (list): A list of records of the on_chain_end method. + on_tool_start_records (list): A list of records of the on_tool_start method. + on_tool_end_records (list): A list of records of the on_tool_end method. + on_agent_finish_records (list): A list of records of the on_agent_end method. + """ + + def __init__(self) -> None: + self.step = 0 + + self.starts = 0 + self.ends = 0 + self.errors = 0 + self.text_ctr = 0 + + self.ignore_llm_ = False + self.ignore_chain_ = False + self.ignore_agent_ = False + self.always_verbose_ = False + + self.chain_starts = 0 + self.chain_ends = 0 + + self.llm_starts = 0 + self.llm_ends = 0 + self.llm_streams = 0 + + self.tool_starts = 0 + self.tool_ends = 0 + + self.agent_ends = 0 + + self.on_llm_start_records: list = [] + self.on_llm_token_records: list = [] + self.on_llm_end_records: list = [] + + self.on_chain_start_records: list = [] + self.on_chain_end_records: list = [] + + self.on_tool_start_records: list = [] + self.on_tool_end_records: list = [] + + self.on_text_records: list = [] + self.on_agent_finish_records: list = [] + self.on_agent_action_records: list = [] + + @property + def always_verbose(self) -> bool: + """Whether to call verbose callbacks even if verbose is False.""" + return self.always_verbose_ + + @property + def ignore_llm(self) -> bool: + """Whether to ignore LLM callbacks.""" + return self.ignore_llm_ + + @property + def ignore_chain(self) -> bool: + """Whether to ignore chain callbacks.""" + return self.ignore_chain_ + + @property + def ignore_agent(self) -> bool: + """Whether to ignore agent callbacks.""" + return self.ignore_agent_ + + def get_custom_callback_meta(self) -> Dict[str, Any]: + return { + "step": self.step, + "starts": self.starts, + "ends": self.ends, + "errors": self.errors, + "text_ctr": self.text_ctr, + "chain_starts": self.chain_starts, + "chain_ends": self.chain_ends, + "llm_starts": self.llm_starts, + "llm_ends": self.llm_ends, + "llm_streams": self.llm_streams, + "tool_starts": self.tool_starts, + "tool_ends": self.tool_ends, + "agent_ends": self.agent_ends, + } + + def reset_callback_meta(self) -> None: + """Reset the callback metadata.""" + self.step = 0 + + self.starts = 0 + self.ends = 0 + self.errors = 0 + self.text_ctr = 0 + + self.ignore_llm_ = False + self.ignore_chain_ = False + self.ignore_agent_ = False + self.always_verbose_ = False + + self.chain_starts = 0 + self.chain_ends = 0 + + self.llm_starts = 0 + self.llm_ends = 0 + self.llm_streams = 0 + + self.tool_starts = 0 + self.tool_ends = 0 + + self.agent_ends = 0 + + self.on_llm_start_records = [] + self.on_llm_token_records = [] + self.on_llm_end_records = [] + + self.on_chain_start_records = [] + self.on_chain_end_records = [] + + self.on_tool_start_records = [] + self.on_tool_end_records = [] + + self.on_text_records = [] + self.on_agent_finish_records = [] + self.on_agent_action_records = [] + return None + + +class WandbCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler): + """Callback Handler that logs to Weights and Biases. + + Parameters: + job_type (str): The type of job. + project (str): The project to log to. + entity (str): The entity to log to. + tags (list): The tags to log. + group (str): The group to log to. + name (str): The name of the run. + notes (str): The notes to log. + visualize (bool): Whether to visualize the run. + complexity_metrics (bool): Whether to log complexity metrics. + stream_logs (bool): Whether to stream callback actions to W&B + + This handler will utilize the associated callback method called and formats + the input of each callback function with metadata regarding the state of LLM run, + and adds the response to the list of records for both the {method}_records and + action. It then logs the response using the run.log() method to Weights and Biases. + """ + + def __init__( + self, + job_type: Optional[str] = None, + project: Optional[str] = "langchain_callback_demo", + entity: Optional[str] = None, + tags: Optional[Sequence] = None, + group: Optional[str] = None, + name: Optional[str] = None, + notes: Optional[str] = None, + visualize: bool = False, + complexity_metrics: bool = False, + stream_logs: bool = False, + ) -> None: + """Initialize callback handler.""" + + wandb = import_wandb() + import_pandas() + import_textstat() + spacy = import_spacy() + super().__init__() + + self.job_type = job_type + self.project = project + self.entity = entity + self.tags = tags + self.group = group + self.name = name + self.notes = notes + self.visualize = visualize + self.complexity_metrics = complexity_metrics + self.stream_logs = stream_logs + + self.temp_dir = tempfile.TemporaryDirectory() + self.run: wandb.sdk.wandb_run.Run = wandb.init( # type: ignore + job_type=self.job_type, + project=self.project, + entity=self.entity, + tags=self.tags, + group=self.group, + name=self.name, + notes=self.notes, + ) + warning = ( + "The wandb callback is currently in beta and is subject to change " + "based on updates to `langchain`. Please report any issues to " + "https://github.com/wandb/wandb/issues with the tag `langchain`." + ) + wandb.termwarn( + warning, + repeat=False, + ) + self.callback_columns: list = [] + self.action_records: list = [] + self.complexity_metrics = complexity_metrics + self.visualize = visualize + self.nlp = spacy.load("en_core_web_sm") + + def _init_resp(self) -> Dict: + return {k: None for k in self.callback_columns} + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts.""" + self.step += 1 + self.llm_starts += 1 + self.starts += 1 + + resp = self._init_resp() + resp.update({"action": "on_llm_start"}) + resp.update(flatten_dict(serialized)) + resp.update(self.get_custom_callback_meta()) + + for prompt in prompts: + prompt_resp = deepcopy(resp) + prompt_resp["prompts"] = prompt + self.on_llm_start_records.append(prompt_resp) + self.action_records.append(prompt_resp) + if self.stream_logs: + self.run.log(prompt_resp) + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run when LLM generates a new token.""" + self.step += 1 + self.llm_streams += 1 + + resp = self._init_resp() + resp.update({"action": "on_llm_new_token", "token": token}) + resp.update(self.get_custom_callback_meta()) + + self.on_llm_token_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + self.step += 1 + self.llm_ends += 1 + self.ends += 1 + + resp = self._init_resp() + resp.update({"action": "on_llm_end"}) + resp.update(flatten_dict(response.llm_output or {})) + resp.update(self.get_custom_callback_meta()) + + for generations in response.generations: + for generation in generations: + generation_resp = deepcopy(resp) + generation_resp.update(flatten_dict(generation.dict())) + generation_resp.update( + analyze_text( + generation.text, + complexity_metrics=self.complexity_metrics, + visualize=self.visualize, + nlp=self.nlp, + output_dir=self.temp_dir.name, + ) + ) + self.on_llm_end_records.append(generation_resp) + self.action_records.append(generation_resp) + if self.stream_logs: + self.run.log(generation_resp) + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + self.step += 1 + self.errors += 1 + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + self.step += 1 + self.chain_starts += 1 + self.starts += 1 + + resp = self._init_resp() + resp.update({"action": "on_chain_start"}) + resp.update(flatten_dict(serialized)) + resp.update(self.get_custom_callback_meta()) + + chain_input = inputs["input"] + + if isinstance(chain_input, str): + input_resp = deepcopy(resp) + input_resp["input"] = chain_input + self.on_chain_start_records.append(input_resp) + self.action_records.append(input_resp) + if self.stream_logs: + self.run.log(input_resp) + elif isinstance(chain_input, list): + for inp in chain_input: + input_resp = deepcopy(resp) + input_resp.update(inp) + self.on_chain_start_records.append(input_resp) + self.action_records.append(input_resp) + if self.stream_logs: + self.run.log(input_resp) + else: + raise ValueError("Unexpected data format provided!") + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + self.step += 1 + self.chain_ends += 1 + self.ends += 1 + + resp = self._init_resp() + resp.update({"action": "on_chain_end", "outputs": outputs["output"]}) + resp.update(self.get_custom_callback_meta()) + + self.on_chain_end_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + self.step += 1 + self.errors += 1 + + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + self.step += 1 + self.tool_starts += 1 + self.starts += 1 + + resp = self._init_resp() + resp.update({"action": "on_tool_start", "input_str": input_str}) + resp.update(flatten_dict(serialized)) + resp.update(self.get_custom_callback_meta()) + + self.on_tool_start_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + self.step += 1 + self.tool_ends += 1 + self.ends += 1 + + resp = self._init_resp() + resp.update({"action": "on_tool_end", "output": output}) + resp.update(self.get_custom_callback_meta()) + + self.on_tool_end_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + self.step += 1 + self.errors += 1 + + def on_text(self, text: str, **kwargs: Any) -> None: + """ + Run when agent is ending. + """ + self.step += 1 + self.text_ctr += 1 + + resp = self._init_resp() + resp.update({"action": "on_text", "text": text}) + resp.update(self.get_custom_callback_meta()) + + self.on_text_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run when agent ends running.""" + self.step += 1 + self.agent_ends += 1 + self.ends += 1 + + resp = self._init_resp() + resp.update( + { + "action": "on_agent_finish", + "output": finish.return_values["output"], + "log": finish.log, + } + ) + resp.update(self.get_custom_callback_meta()) + + self.on_agent_finish_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + self.step += 1 + self.tool_starts += 1 + self.starts += 1 + + resp = self._init_resp() + resp.update( + { + "action": "on_agent_action", + "tool": action.tool, + "tool_input": action.tool_input, + "log": action.log, + } + ) + resp.update(self.get_custom_callback_meta()) + self.on_agent_action_records.append(resp) + self.action_records.append(resp) + if self.stream_logs: + self.run.log(resp) + + def _create_session_analysis_df(self) -> Any: + """Create a dataframe with all the information from the session.""" + pd = import_pandas() + on_llm_start_records_df = pd.DataFrame(self.on_llm_start_records) + on_llm_end_records_df = pd.DataFrame(self.on_llm_end_records) + selected_columns = ["step", "prompts", "name"] + on_llm_start_records_selected_df = on_llm_start_records_df[selected_columns] + llm_input_prompts_df = ( + on_llm_start_records_selected_df + .dropna(axis=1) + .rename({"step": "prompt_step"}, axis=1) + ) + complexity_metrics_columns = [] + visualizations_columns = [] + + if self.complexity_metrics: + complexity_metrics_columns = [ + "flesch_reading_ease", + "flesch_kincaid_grade", + "smog_index", + "coleman_liau_index", + "automated_readability_index", + "dale_chall_readability_score", + "difficult_words", + "linsear_write_formula", + "gunning_fog", + "text_standard", + "fernandez_huerta", + "szigriszt_pazos", + "gutierrez_polini", + "crawford", + "gulpease_index", + "osman", + ] + + if self.visualize: + visualizations_columns = ["dependency_tree", "entities"] + + llm_outputs_df = ( + on_llm_end_records_df[ + [ + "step", + "text", + "token_usage_total_tokens", + "token_usage_prompt_tokens", + "token_usage_completion_tokens", + ] + + complexity_metrics_columns + + visualizations_columns + ] + .dropna(axis=1) + .rename({"step": "output_step", "text": "output"}, axis=1) + ) + session_analysis_df = pd.concat([llm_input_prompts_df, llm_outputs_df], axis=1) + session_analysis_df["chat_html"] = session_analysis_df[ + ["prompts", "output"] + ].apply( + lambda row: construct_html_from_prompt_and_generation( + row["prompts"], row["output"] + ), + axis=1, + ) + return session_analysis_df + + def flush_tracker( + self, + langchain_asset: Any = None, + reset: bool = True, + finish: bool = False, + job_type: Optional[str] = None, + project: Optional[str] = None, + entity: Optional[str] = None, + tags: Optional[Sequence] = None, + group: Optional[str] = None, + name: Optional[str] = None, + notes: Optional[str] = None, + visualize: Optional[bool] = None, + complexity_metrics: Optional[bool] = None, + ) -> None: + """Flush the tracker and reset the session. + + Args: + langchain_asset: The langchain asset to save. + reset: Whether to reset the session. + finish: Whether to finish the run. + job_type: The job type. + project: The project. + entity: The entity. + tags: The tags. + group: The group. + name: The name. + notes: The notes. + visualize: Whether to visualize. + complexity_metrics: Whether to compute complexity metrics. + + Returns: + None + """ + pd = import_pandas() + wandb = import_wandb() + action_records_table = wandb.Table(dataframe=pd.DataFrame(self.action_records)) + session_analysis_dataframe = self._create_session_analysis_df() + session_analysis_table = wandb.Table( + dataframe=session_analysis_dataframe + ) + self.run.log( + { + "action_records": action_records_table, + "session_analysis": session_analysis_table, + } + ) + + if langchain_asset: + langchain_asset_path = Path(self.temp_dir.name, "model.json") + model_artifact = wandb.Artifact(name="model", type="model") + model_artifact.add(action_records_table, name="action_records") + model_artifact.add(session_analysis_table, name="session_analysis") + try: + langchain_asset.save_agent(langchain_asset_path) + model_artifact.add_file(str(langchain_asset_path)) + model_artifact.metadata = load_json_to_dict(langchain_asset_path) + except ValueError: + langchain_asset.save_agent(langchain_asset_path) + model_artifact.add_file(str(langchain_asset_path)) + model_artifact.metadata = load_json_to_dict(langchain_asset_path) + except NotImplementedError as e: + print("Could not save model.") + print(repr(e)) + pass + self.run.log_artifact(model_artifact) + + if finish or reset: + self.run.finish() + self.temp_dir.cleanup() + self.reset_callback_meta() + if reset: + self.__init__( # type: ignore + job_type=job_type if job_type else self.job_type, + project=project if project else self.project, + entity=entity if entity else self.entity, + tags=tags if tags else self.tags, + group=group if group else self.group, + name=name if name else self.name, + notes=notes if notes else self.notes, + visualize=visualize if visualize else self.visualize, + complexity_metrics=complexity_metrics + if complexity_metrics + else self.complexity_metrics, + ) diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0307fcf3afaec67b3ad95a0638379972a71d151b --- /dev/null +++ b/langchain/chains/__init__.py @@ -0,0 +1,64 @@ +"""Chains are easily reusable components which can be linked together.""" +from langchain.chains.api.base import APIChain +from langchain.chains.combine_documents.base import AnalyzeDocumentChain +from langchain.chains.constitutional_ai.base import ConstitutionalChain +from langchain.chains.conversation.base import ConversationChain +from langchain.chains.conversational_retrieval.base import ( + ChatVectorDBChain, + ConversationalRetrievalChain, +) +from langchain.chains.graph_qa.base import GraphQAChain +from langchain.chains.hyde.base import HypotheticalDocumentEmbedder +from langchain.chains.llm import LLMChain +from langchain.chains.llm_bash.base import LLMBashChain +from langchain.chains.llm_checker.base import LLMCheckerChain +from langchain.chains.llm_math.base import LLMMathChain +from langchain.chains.llm_requests import LLMRequestsChain +from langchain.chains.llm_summarization_checker.base import LLMSummarizationCheckerChain +from langchain.chains.loading import load_chain +from langchain.chains.mapreduce import MapReduceChain +from langchain.chains.moderation import OpenAIModerationChain +from langchain.chains.pal.base import PALChain +from langchain.chains.qa_generation.base import QAGenerationChain +from langchain.chains.qa_with_sources.base import QAWithSourcesChain +from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain +from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain +from langchain.chains.retrieval_qa.base import RetrievalQA, VectorDBQA +from langchain.chains.sequential import SequentialChain, SimpleSequentialChain +from langchain.chains.sql_database.base import ( + SQLDatabaseChain, + SQLDatabaseSequentialChain, +) +from langchain.chains.transform import TransformChain + +__all__ = [ + "ConversationChain", + "LLMChain", + "LLMBashChain", + "LLMCheckerChain", + "LLMSummarizationCheckerChain", + "LLMMathChain", + "PALChain", + "QAWithSourcesChain", + "SQLDatabaseChain", + "SequentialChain", + "SimpleSequentialChain", + "VectorDBQA", + "VectorDBQAWithSourcesChain", + "APIChain", + "LLMRequestsChain", + "TransformChain", + "MapReduceChain", + "OpenAIModerationChain", + "SQLDatabaseSequentialChain", + "load_chain", + "AnalyzeDocumentChain", + "HypotheticalDocumentEmbedder", + "ChatVectorDBChain", + "GraphQAChain", + "ConstitutionalChain", + "QAGenerationChain", + "RetrievalQA", + "RetrievalQAWithSourcesChain", + "ConversationalRetrievalChain", +] diff --git a/langchain/chains/__pycache__/__init__.cpython-39.pyc b/langchain/chains/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db06741e0038293fd37f5a1d9eb3e2fda8ad9ee4 Binary files /dev/null and b/langchain/chains/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/base.cpython-39.pyc b/langchain/chains/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53b4f0a0382c834f449cc99c34400138988068bc Binary files /dev/null and b/langchain/chains/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/llm.cpython-39.pyc b/langchain/chains/__pycache__/llm.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92974127fc274b20155cbee7acbff12e214c0463 Binary files /dev/null and b/langchain/chains/__pycache__/llm.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/llm_requests.cpython-39.pyc b/langchain/chains/__pycache__/llm_requests.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1bc315d91843be0417fb4a62958a197ac5a0913 Binary files /dev/null and b/langchain/chains/__pycache__/llm_requests.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/loading.cpython-39.pyc b/langchain/chains/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60956ca38acbe41848b3ce1ea2b6fabb51a94851 Binary files /dev/null and b/langchain/chains/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/mapreduce.cpython-39.pyc b/langchain/chains/__pycache__/mapreduce.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..142c63ad894a84e2fd164edb40a0a0f52bd5fedb Binary files /dev/null and b/langchain/chains/__pycache__/mapreduce.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/moderation.cpython-39.pyc b/langchain/chains/__pycache__/moderation.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05e56ed303172273b6694be59409c3273992f16a Binary files /dev/null and b/langchain/chains/__pycache__/moderation.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/prompt_selector.cpython-39.pyc b/langchain/chains/__pycache__/prompt_selector.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..051110e5a0d9d28b8887dfdd8e0940b5af44e611 Binary files /dev/null and b/langchain/chains/__pycache__/prompt_selector.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/sequential.cpython-39.pyc b/langchain/chains/__pycache__/sequential.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..791c163877f2dc4ef23bdf36576b6c8af7208693 Binary files /dev/null and b/langchain/chains/__pycache__/sequential.cpython-39.pyc differ diff --git a/langchain/chains/__pycache__/transform.cpython-39.pyc b/langchain/chains/__pycache__/transform.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cfbb81c1c2476361e1ae286ac63d08ec69fd7b7 Binary files /dev/null and b/langchain/chains/__pycache__/transform.cpython-39.pyc differ diff --git a/langchain/chains/api/__init__.py b/langchain/chains/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..efe2fb36ba9a35646b59caaa6fa56f1255a7e031 --- /dev/null +++ b/langchain/chains/api/__init__.py @@ -0,0 +1 @@ +"""Chain that makes API calls and summarizes the responses to answer a question.""" diff --git a/langchain/chains/api/__pycache__/__init__.cpython-39.pyc b/langchain/chains/api/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f2992c90ebe0e94a7dc148bda4173c1c168fd11 Binary files /dev/null and b/langchain/chains/api/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/base.cpython-39.pyc b/langchain/chains/api/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e689736a16dfebd8548d41e5af6ee883f15204a2 Binary files /dev/null and b/langchain/chains/api/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/news_docs.cpython-39.pyc b/langchain/chains/api/__pycache__/news_docs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1be9cd68e36b3f4d2e4f86d6d34841c357e299c8 Binary files /dev/null and b/langchain/chains/api/__pycache__/news_docs.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/open_meteo_docs.cpython-39.pyc b/langchain/chains/api/__pycache__/open_meteo_docs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9df409aad766622164ac9989e9babe8f5fe8486a Binary files /dev/null and b/langchain/chains/api/__pycache__/open_meteo_docs.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/podcast_docs.cpython-39.pyc b/langchain/chains/api/__pycache__/podcast_docs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4b7185ca43abdf6f6d78a87d265594ad3ede378 Binary files /dev/null and b/langchain/chains/api/__pycache__/podcast_docs.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/prompt.cpython-39.pyc b/langchain/chains/api/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd043fe2911c662729eb1a978aa77b3449517b44 Binary files /dev/null and b/langchain/chains/api/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/api/__pycache__/tmdb_docs.cpython-39.pyc b/langchain/chains/api/__pycache__/tmdb_docs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38eff54364bed59d066c46ffa3ff8c73b63c6928 Binary files /dev/null and b/langchain/chains/api/__pycache__/tmdb_docs.cpython-39.pyc differ diff --git a/langchain/chains/api/base.py b/langchain/chains/api/base.py new file mode 100644 index 0000000000000000000000000000000000000000..5cbded4e0b115cdc3fc535d7144ab2ad113cf285 --- /dev/null +++ b/langchain/chains/api/base.py @@ -0,0 +1,108 @@ +"""Chain that makes API calls and summarizes the responses to answer a question.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field, root_validator + +from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.prompts import BasePromptTemplate +from langchain.requests import RequestsWrapper +from langchain.schema import BaseLanguageModel + + +class APIChain(Chain, BaseModel): + """Chain that makes API calls and summarizes the responses to answer a question.""" + + api_request_chain: LLMChain + api_answer_chain: LLMChain + requests_wrapper: RequestsWrapper = Field(exclude=True) + api_docs: str + question_key: str = "question" #: :meta private: + output_key: str = "output" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.question_key] + + @property + def output_keys(self) -> List[str]: + """Expect output key. + + :meta private: + """ + return [self.output_key] + + @root_validator(pre=True) + def validate_api_request_prompt(cls, values: Dict) -> Dict: + """Check that api request prompt expects the right variables.""" + input_vars = values["api_request_chain"].prompt.input_variables + expected_vars = {"question", "api_docs"} + if set(input_vars) != expected_vars: + raise ValueError( + f"Input variables should be {expected_vars}, got {input_vars}" + ) + return values + + @root_validator(pre=True) + def validate_api_answer_prompt(cls, values: Dict) -> Dict: + """Check that api answer prompt expects the right variables.""" + input_vars = values["api_answer_chain"].prompt.input_variables + expected_vars = {"question", "api_docs", "api_url", "api_response"} + if set(input_vars) != expected_vars: + raise ValueError( + f"Input variables should be {expected_vars}, got {input_vars}" + ) + return values + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + question = inputs[self.question_key] + api_url = self.api_request_chain.predict( + question=question, api_docs=self.api_docs + ) + self.callback_manager.on_text( + api_url, color="green", end="\n", verbose=self.verbose + ) + api_response = self.requests_wrapper.get(api_url) + self.callback_manager.on_text( + api_response, color="yellow", end="\n", verbose=self.verbose + ) + answer = self.api_answer_chain.predict( + question=question, + api_docs=self.api_docs, + api_url=api_url, + api_response=api_response, + ) + return {self.output_key: answer} + + @classmethod + def from_llm_and_api_docs( + cls, + llm: BaseLanguageModel, + api_docs: str, + headers: Optional[dict] = None, + api_url_prompt: BasePromptTemplate = API_URL_PROMPT, + api_response_prompt: BasePromptTemplate = API_RESPONSE_PROMPT, + **kwargs: Any, + ) -> APIChain: + """Load chain from just an LLM and the api docs.""" + get_request_chain = LLMChain(llm=llm, prompt=api_url_prompt) + requests_wrapper = RequestsWrapper(headers=headers) + get_answer_chain = LLMChain(llm=llm, prompt=api_response_prompt) + return cls( + api_request_chain=get_request_chain, + api_answer_chain=get_answer_chain, + requests_wrapper=requests_wrapper, + api_docs=api_docs, + **kwargs, + ) + + @property + def _chain_type(self) -> str: + return "api_chain" diff --git a/langchain/chains/api/news_docs.py b/langchain/chains/api/news_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..7e84c1da4d628dbf757a6e9ac332ba00d24e1bfe --- /dev/null +++ b/langchain/chains/api/news_docs.py @@ -0,0 +1,32 @@ +# flake8: noqa +NEWS_DOCS = """API documentation: +Endpoint: https://newsapi.org +Top headlines /v2/top-headlines + +This endpoint provides live top and breaking headlines for a country, specific category in a country, single source, or multiple sources. You can also search with keywords. Articles are sorted by the earliest date published first. + +This endpoint is great for retrieving headlines for use with news tickers or similar. +Request parameters + + country | The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae ar at au be bg br ca ch cn co cu cz de eg fr gb gr hk hu id ie il in it jp kr lt lv ma mx my ng nl no nz ph pl pt ro rs ru sa se sg si sk th tr tw ua us ve za. Note: you can't mix this param with the sources param. + category | The category you want to get headlines for. Possible options: business entertainment general health science sports technology. Note: you can't mix this param with the sources param. + sources | A comma-seperated string of identifiers for the news sources or blogs you want headlines from. Use the /top-headlines/sources endpoint to locate these programmatically or look at the sources index. Note: you can't mix this param with the country or category params. + q | Keywords or a phrase to search for. + pageSize | int | The number of results to return per page (request). 20 is the default, 100 is the maximum. + page | int | Use this to page through the results if the total results found is greater than the page size. + +Response object + status | string | If the request was successful or not. Options: ok, error. In the case of error a code and message property will be populated. + totalResults | int | The total number of results available for your request. + articles | array[article] | The results of the request. + source | object | The identifier id and a display name name for the source this article came from. + author | string | The author of the article + title | string | The headline or title of the article. + description | string | A description or snippet from the article. + url | string | The direct URL to the article. + urlToImage | string | The URL to a relevant image for the article. + publishedAt | string | The date and time that the article was published, in UTC (+000) + content | string | The unformatted content of the article, where available. This is truncated to 200 chars. + +Use page size: 2 +""" diff --git a/langchain/chains/api/open_meteo_docs.py b/langchain/chains/api/open_meteo_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..4abd86fb83a5a038593cb13855347194e59f2bab --- /dev/null +++ b/langchain/chains/api/open_meteo_docs.py @@ -0,0 +1,33 @@ +# flake8: noqa +OPEN_METEO_DOCS = """BASE URL: https://api.open-meteo.com/ + +API Documentation +The API endpoint /v1/forecast accepts a geographical coordinate, a list of weather variables and responds with a JSON hourly weather forecast for 7 days. Time always starts at 0:00 today and contains 168 hours. All URL parameters are listed below: + +Parameter Format Required Default Description +latitude, longitude Floating point Yes Geographical WGS84 coordinate of the location +hourly String array No A list of weather variables which should be returned. Values can be comma separated, or multiple &hourly= parameter in the URL can be used. +daily String array No A list of daily weather variable aggregations which should be returned. Values can be comma separated, or multiple &daily= parameter in the URL can be used. If daily weather variables are specified, parameter timezone is required. +current_weather Bool No false Include current weather conditions in the JSON output. +temperature_unit String No celsius If fahrenheit is set, all temperature values are converted to Fahrenheit. +windspeed_unit String No kmh Other wind speed speed units: ms, mph and kn +precipitation_unit String No mm Other precipitation amount units: inch +timeformat String No iso8601 If format unixtime is selected, all time values are returned in UNIX epoch time in seconds. Please note that all timestamp are in GMT+0! For daily values with unix timestamps, please apply utc_offset_seconds again to get the correct date. +timezone String No GMT If timezone is set, all timestamps are returned as local-time and data is returned starting at 00:00 local-time. Any time zone name from the time zone database is supported. If auto is set as a time zone, the coordinates will be automatically resolved to the local time zone. +past_days Integer (0-2) No 0 If past_days is set, yesterday or the day before yesterday data are also returned. +start_date +end_date String (yyyy-mm-dd) No The time interval to get weather data. A day must be specified as an ISO8601 date (e.g. 2022-06-30). +models String array No auto Manually select one or more weather models. Per default, the best suitable weather models will be combined. + +Hourly Parameter Definition +The parameter &hourly= accepts the following values. Most weather variables are given as an instantaneous value for the indicated hour. Some variables like precipitation are calculated from the preceding hour as an average or sum. + +Variable Valid time Unit Description +temperature_2m Instant °C (°F) Air temperature at 2 meters above ground +snowfall Preceding hour sum cm (inch) Snowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent +rain Preceding hour sum mm (inch) Rain from large scale weather systems of the preceding hour in millimeter +showers Preceding hour sum mm (inch) Showers from convective precipitation in millimeters from the preceding hour +weathercode Instant WMO code Weather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details. +snow_depth Instant meters Snow depth on the ground +freezinglevel_height Instant meters Altitude above sea level of the 0°C level +visibility Instant meters Viewing distance in meters. Influenced by low clouds, humidity and aerosols. Maximum visibility is approximately 24 km.""" diff --git a/langchain/chains/api/podcast_docs.py b/langchain/chains/api/podcast_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..9c4e5cbf827ff4264089a2b361bff1439d2df631 --- /dev/null +++ b/langchain/chains/api/podcast_docs.py @@ -0,0 +1,28 @@ +# flake8: noqa +PODCAST_DOCS = """API documentation: +Endpoint: https://listen-api.listennotes.com/api/v2 +GET /search + +This API is for searching podcasts or episodes. + +Query parameters table: +q | string | Search term, e.g., person, place, topic... You can use double quotes to do verbatim match, e.g., "game of thrones". Otherwise, it's fuzzy search. | required +type | string | What type of contents do you want to search for? Available values: episode, podcast, curated. default: episode | optional +page_size | integer | The maximum number of search results per page. A valid value should be an integer between 1 and 10 (inclusive). default: 3 | optional +language | string | Limit search results to a specific language, e.g., English, Chinese ... If not specified, it'll be any language. It works only when type is episode or podcast. | optional +region | string | Limit search results to a specific region (e.g., us, gb, in...). If not specified, it'll be any region. It works only when type is episode or podcast. | optional +len_min | integer | Minimum audio length in minutes. Applicable only when type parameter is episode or podcast. If type parameter is episode, it's for audio length of an episode. If type parameter is podcast, it's for average audio length of all episodes in a podcast. | optional +len_max | integer | Maximum audio length in minutes. Applicable only when type parameter is episode or podcast. If type parameter is episode, it's for audio length of an episode. If type parameter is podcast, it's for average audio length of all episodes in a podcast. | optional + +Response schema (JSON object): +next_offset | integer | optional +total | integer | optional +results | array[object] (Episode / Podcast List Result Object) + +Each object in the "results" key has the following schema: +listennotes_url | string | optional +id | integer | optional +title_highlighted | string | optional + +Use page_size: 3 +""" diff --git a/langchain/chains/api/prompt.py b/langchain/chains/api/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..020ac8d1b4cb804b89953fa7a3962079963c161c --- /dev/null +++ b/langchain/chains/api/prompt.py @@ -0,0 +1,36 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +API_URL_PROMPT_TEMPLATE = """You are given the below API Documentation: +{api_docs} +Using this documentation, generate the full API url to call for answering the user question. +You should build the API url in order to get a response that is as short as possible, while still getting the necessary information to answer the question. Pay attention to deliberately exclude any unnecessary pieces of data in the API call. + +Question:{question} +API url:""" + +API_URL_PROMPT = PromptTemplate( + input_variables=[ + "api_docs", + "question", + ], + template=API_URL_PROMPT_TEMPLATE, +) + +API_RESPONSE_PROMPT_TEMPLATE = ( + API_URL_PROMPT_TEMPLATE + + """ {api_url} + +Here is the response from the API: + +{api_response} + +Summarize this response to answer the original question. + +Summary:""" +) + +API_RESPONSE_PROMPT = PromptTemplate( + input_variables=["api_docs", "question", "api_url", "api_response"], + template=API_RESPONSE_PROMPT_TEMPLATE, +) diff --git a/langchain/chains/api/tmdb_docs.py b/langchain/chains/api/tmdb_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..20596f0cd296e20bc9b78157c10a8732ed3d27b6 --- /dev/null +++ b/langchain/chains/api/tmdb_docs.py @@ -0,0 +1,37 @@ +# flake8: noqa +TMDB_DOCS = """API documentation: +Endpoint: https://api.themoviedb.org/3 +GET /search/movie + +This API is for searching movies. + +Query parameters table: +language | string | Pass a ISO 639-1 value to display translated data for the fields that support it. minLength: 2, pattern: ([a-z]{2})-([A-Z]{2}), default: en-US | optional +query | string | Pass a text query to search. This value should be URI encoded. minLength: 1 | required +page | integer | Specify which page to query. minimum: 1, maximum: 1000, default: 1 | optional +include_adult | boolean | Choose whether to inlcude adult (pornography) content in the results. default | optional +region | string | Specify a ISO 3166-1 code to filter release dates. Must be uppercase. pattern: ^[A-Z]{2}$ | optional +year | integer | optional +primary_release_year | integer | optional + +Response schema (JSON object): +page | integer | optional +total_results | integer | optional +total_pages | integer | optional +results | array[object] (Movie List Result Object) + +Each object in the "results" key has the following schema: +poster_path | string or null | optional +adult | boolean | optional +overview | string | optional +release_date | string | optional +genre_ids | array[integer] | optional +id | integer | optional +original_title | string | optional +original_language | string | optional +title | string | optional +backdrop_path | string or null | optional +popularity | number | optional +vote_count | integer | optional +video | boolean | optional +vote_average | number | optional""" diff --git a/langchain/chains/base.py b/langchain/chains/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a89c5c4c3ab027d77232816df92ace638a4e9fe2 --- /dev/null +++ b/langchain/chains/base.py @@ -0,0 +1,282 @@ +"""Base interface that all chains should implement.""" +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import yaml +from pydantic import BaseModel, Field, validator + +import langchain +from langchain.callbacks import get_callback_manager +from langchain.callbacks.base import BaseCallbackManager +from langchain.schema import BaseMemory + + +def _get_verbosity() -> bool: + return langchain.verbose + + +class Chain(BaseModel, ABC): + """Base interface that all chains should implement.""" + + memory: Optional[BaseMemory] = None + callback_manager: BaseCallbackManager = Field( + default_factory=get_callback_manager, exclude=True + ) + verbose: bool = Field( + default_factory=_get_verbosity + ) # Whether to print the response text + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + @property + def _chain_type(self) -> str: + raise NotImplementedError("Saving not supported for this chain type.") + + @validator("callback_manager", pre=True, always=True) + def set_callback_manager( + cls, callback_manager: Optional[BaseCallbackManager] + ) -> BaseCallbackManager: + """If callback manager is None, set it. + + This allows users to pass in None as callback manager, which is a nice UX. + """ + return callback_manager or get_callback_manager() + + @validator("verbose", pre=True, always=True) + def set_verbose(cls, verbose: Optional[bool]) -> bool: + """If verbose is None, set it. + + This allows users to pass in None as verbose to access the global setting. + """ + if verbose is None: + return _get_verbosity() + else: + return verbose + + @property + @abstractmethod + def input_keys(self) -> List[str]: + """Input keys this chain expects.""" + + @property + @abstractmethod + def output_keys(self) -> List[str]: + """Output keys this chain expects.""" + + def _validate_inputs(self, inputs: Dict[str, str]) -> None: + """Check that all inputs are present.""" + missing_keys = set(self.input_keys).difference(inputs) + if missing_keys: + raise ValueError(f"Missing some input keys: {missing_keys}") + + def _validate_outputs(self, outputs: Dict[str, str]) -> None: + if set(outputs) != set(self.output_keys): + raise ValueError( + f"Did not get output keys that were expected. " + f"Got: {set(outputs)}. Expected: {set(self.output_keys)}." + ) + + @abstractmethod + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + """Run the logic of this chain and return the output.""" + + async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]: + """Run the logic of this chain and return the output.""" + raise NotImplementedError("Async call not supported for this chain type.") + + def __call__( + self, inputs: Union[Dict[str, Any], Any], return_only_outputs: bool = False + ) -> Dict[str, Any]: + """Run the logic of this chain and add to output if desired. + + Args: + inputs: Dictionary of inputs, or single input if chain expects + only one param. + return_only_outputs: boolean for whether to return only outputs in the + response. If True, only new keys generated by this chain will be + returned. If False, both input keys and new keys generated by this + chain will be returned. Defaults to False. + + """ + inputs = self.prep_inputs(inputs) + self.callback_manager.on_chain_start( + {"name": self.__class__.__name__}, + inputs, + verbose=self.verbose, + ) + try: + outputs = self._call(inputs) + except (KeyboardInterrupt, Exception) as e: + self.callback_manager.on_chain_error(e, verbose=self.verbose) + raise e + self.callback_manager.on_chain_end(outputs, verbose=self.verbose) + return self.prep_outputs(inputs, outputs, return_only_outputs) + + async def acall( + self, inputs: Union[Dict[str, Any], Any], return_only_outputs: bool = False + ) -> Dict[str, Any]: + """Run the logic of this chain and add to output if desired. + + Args: + inputs: Dictionary of inputs, or single input if chain expects + only one param. + return_only_outputs: boolean for whether to return only outputs in the + response. If True, only new keys generated by this chain will be + returned. If False, both input keys and new keys generated by this + chain will be returned. Defaults to False. + + """ + inputs = self.prep_inputs(inputs) + if self.callback_manager.is_async: + await self.callback_manager.on_chain_start( + {"name": self.__class__.__name__}, + inputs, + verbose=self.verbose, + ) + else: + self.callback_manager.on_chain_start( + {"name": self.__class__.__name__}, + inputs, + verbose=self.verbose, + ) + try: + outputs = await self._acall(inputs) + except (KeyboardInterrupt, Exception) as e: + if self.callback_manager.is_async: + await self.callback_manager.on_chain_error(e, verbose=self.verbose) + else: + self.callback_manager.on_chain_error(e, verbose=self.verbose) + raise e + if self.callback_manager.is_async: + await self.callback_manager.on_chain_end(outputs, verbose=self.verbose) + else: + self.callback_manager.on_chain_end(outputs, verbose=self.verbose) + return self.prep_outputs(inputs, outputs, return_only_outputs) + + def prep_outputs( + self, + inputs: Dict[str, str], + outputs: Dict[str, str], + return_only_outputs: bool = False, + ) -> Dict[str, str]: + """Validate and prep outputs.""" + self._validate_outputs(outputs) + if self.memory is not None: + self.memory.save_context(inputs, outputs) + if return_only_outputs: + return outputs + else: + return {**inputs, **outputs} + + def prep_inputs(self, inputs: Union[Dict[str, Any], Any]) -> Dict[str, str]: + """Validate and prep inputs.""" + if not isinstance(inputs, dict): + _input_keys = set(self.input_keys) + if self.memory is not None: + # If there are multiple input keys, but some get set by memory so that + # only one is not set, we can still figure out which key it is. + _input_keys = _input_keys.difference(self.memory.memory_variables) + if len(_input_keys) != 1: + raise ValueError( + f"A single string input was passed in, but this chain expects " + f"multiple inputs ({_input_keys}). When a chain expects " + f"multiple inputs, please call it by passing in a dictionary, " + "eg `chain({'foo': 1, 'bar': 2})`" + ) + inputs = {list(_input_keys)[0]: inputs} + if self.memory is not None: + external_context = self.memory.load_memory_variables(inputs) + inputs = dict(inputs, **external_context) + self._validate_inputs(inputs) + return inputs + + def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]: + """Call the chain on all inputs in the list.""" + return [self(inputs) for inputs in input_list] + + def run(self, *args: str, **kwargs: str) -> str: + """Run the chain as text in, text out or multiple variables, text out.""" + if len(self.output_keys) != 1: + raise ValueError( + f"`run` not supported when there is not exactly " + f"one output key. Got {self.output_keys}." + ) + + if args and not kwargs: + if len(args) != 1: + raise ValueError("`run` supports only one positional argument.") + return self(args[0])[self.output_keys[0]] + + if kwargs and not args: + return self(kwargs)[self.output_keys[0]] + + raise ValueError( + f"`run` supported with either positional arguments or keyword arguments" + f" but not both. Got args: {args} and kwargs: {kwargs}." + ) + + async def arun(self, *args: str, **kwargs: str) -> str: + """Run the chain as text in, text out or multiple variables, text out.""" + if len(self.output_keys) != 1: + raise ValueError( + f"`run` not supported when there is not exactly " + f"one output key. Got {self.output_keys}." + ) + + if args and not kwargs: + if len(args) != 1: + raise ValueError("`run` supports only one positional argument.") + return (await self.acall(args[0]))[self.output_keys[0]] + + if kwargs and not args: + return (await self.acall(kwargs))[self.output_keys[0]] + + raise ValueError( + f"`run` supported with either positional arguments or keyword arguments" + f" but not both. Got args: {args} and kwargs: {kwargs}." + ) + + def dict(self, **kwargs: Any) -> Dict: + """Return dictionary representation of chain.""" + if self.memory is not None: + raise ValueError("Saving of memory is not yet supported.") + _dict = super().dict() + _dict["_type"] = self._chain_type + return _dict + + def save(self, file_path: Union[Path, str]) -> None: + """Save the chain. + + Args: + file_path: Path to file to save the chain to. + + Example: + .. code-block:: python + + chain.save(file_path="path/chain.yaml") + """ + # Convert file to Path object. + if isinstance(file_path, str): + save_path = Path(file_path) + else: + save_path = file_path + + directory_path = save_path.parent + directory_path.mkdir(parents=True, exist_ok=True) + + # Fetch dictionary to save + chain_dict = self.dict() + + if save_path.suffix == ".json": + with open(file_path, "w") as f: + json.dump(chain_dict, f, indent=4) + elif save_path.suffix == ".yaml": + with open(file_path, "w") as f: + yaml.dump(chain_dict, f, default_flow_style=False) + else: + raise ValueError(f"{save_path} must be json or yaml") diff --git a/langchain/chains/chat_vector_db/__pycache__/__init__.cpython-39.pyc b/langchain/chains/chat_vector_db/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb677c22fb8418de7d7c48482d2e578b2ac1e5f9 Binary files /dev/null and b/langchain/chains/chat_vector_db/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/chat_vector_db/__pycache__/base.cpython-39.pyc b/langchain/chains/chat_vector_db/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8026ceddd8f0899e8c3431711cf7cbe28edcbf51 Binary files /dev/null and b/langchain/chains/chat_vector_db/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/chat_vector_db/__pycache__/prompts.cpython-39.pyc b/langchain/chains/chat_vector_db/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..642e035f881e44cbcf471155074541deb6caf05f Binary files /dev/null and b/langchain/chains/chat_vector_db/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/chains/chat_vector_db/prompts.py b/langchain/chains/chat_vector_db/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..b2a2df09e3f293eab2818fbf1ce113a5eecdcec5 --- /dev/null +++ b/langchain/chains/chat_vector_db/prompts.py @@ -0,0 +1,20 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. + +Chat History: +{chat_history} +Follow Up Input: {question} +Standalone question:""" +CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +QA_PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +) diff --git a/langchain/chains/combine_documents/__init__.py b/langchain/chains/combine_documents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f22b1ccbe84ff855b2519aad8728f2abca4936bb --- /dev/null +++ b/langchain/chains/combine_documents/__init__.py @@ -0,0 +1 @@ +"""Different ways to combine documents.""" diff --git a/langchain/chains/combine_documents/__pycache__/__init__.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0610dbf7c1a8428a1f8aad9bb224cd5f004f49df Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/__pycache__/base.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9be53f39fa0c93c2e28e10e4fd5b4554ab57091 Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/__pycache__/map_reduce.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/map_reduce.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a248ad91dcf3a13ba3fc3408bf0b9cdb5e074218 Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/map_reduce.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/__pycache__/map_rerank.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/map_rerank.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41982284a5cd9afa169118fa612ed17fd6b08c67 Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/map_rerank.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/__pycache__/refine.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/refine.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd314a8f6d37a07fb038bb22c69db2b1d139b576 Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/refine.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/__pycache__/stuff.cpython-39.pyc b/langchain/chains/combine_documents/__pycache__/stuff.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..199b6c80cfb7058805c69c19c290032a2c789d3d Binary files /dev/null and b/langchain/chains/combine_documents/__pycache__/stuff.cpython-39.pyc differ diff --git a/langchain/chains/combine_documents/base.py b/langchain/chains/combine_documents/base.py new file mode 100644 index 0000000000000000000000000000000000000000..dde16d4531a5b1cbfd6dcd8a161c14eb6008c9d7 --- /dev/null +++ b/langchain/chains/combine_documents/base.py @@ -0,0 +1,99 @@ +"""Base interface for chains combining documents.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple + +from pydantic import BaseModel, Field + +from langchain.chains.base import Chain +from langchain.docstore.document import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter + + +class BaseCombineDocumentsChain(Chain, BaseModel, ABC): + """Base interface for chains combining documents.""" + + input_key: str = "input_documents" #: :meta private: + output_key: str = "output_text" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + def prompt_length(self, docs: List[Document], **kwargs: Any) -> Optional[int]: + """Return the prompt length given the documents passed in. + + Returns None if the method does not depend on the prompt length. + """ + return None + + @abstractmethod + def combine_docs(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + """Combine documents into a single string.""" + + @abstractmethod + async def acombine_docs( + self, docs: List[Document], **kwargs: Any + ) -> Tuple[str, dict]: + """Combine documents into a single string asynchronously.""" + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + docs = inputs[self.input_key] + # Other keys are assumed to be needed for LLM prediction + other_keys = {k: v for k, v in inputs.items() if k != self.input_key} + output, extra_return_dict = self.combine_docs(docs, **other_keys) + extra_return_dict[self.output_key] = output + return extra_return_dict + + async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, str]: + docs = inputs[self.input_key] + # Other keys are assumed to be needed for LLM prediction + other_keys = {k: v for k, v in inputs.items() if k != self.input_key} + output, extra_return_dict = await self.acombine_docs(docs, **other_keys) + extra_return_dict[self.output_key] = output + return extra_return_dict + + +class AnalyzeDocumentChain(Chain, BaseModel): + """Chain that splits documents, then analyzes it in pieces.""" + + input_key: str = "input_document" #: :meta private: + output_key: str = "output_text" #: :meta private: + text_splitter: TextSplitter = Field(default_factory=RecursiveCharacterTextSplitter) + combine_docs_chain: BaseCombineDocumentsChain + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + document = inputs[self.input_key] + docs = self.text_splitter.create_documents([document]) + # Other keys are assumed to be needed for LLM prediction + other_keys = {k: v for k, v in inputs.items() if k != self.input_key} + other_keys[self.combine_docs_chain.input_key] = docs + return self.combine_docs_chain(other_keys, return_only_outputs=True) diff --git a/langchain/chains/combine_documents/map_reduce.py b/langchain/chains/combine_documents/map_reduce.py new file mode 100644 index 0000000000000000000000000000000000000000..9f6d4678deebd58d8e76485dd92b32737b57ca39 --- /dev/null +++ b/langchain/chains/combine_documents/map_reduce.py @@ -0,0 +1,197 @@ +"""Combining documents by mapping a chain over them first, then combining results.""" + +from __future__ import annotations + +from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple + +from pydantic import BaseModel, Extra, root_validator + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document + + +class CombineDocsProtocol(Protocol): + """Interface for the combine_docs method.""" + + def __call__(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + """Interface for the combine_docs method.""" + + +def _split_list_of_docs( + docs: List[Document], length_func: Callable, token_max: int, **kwargs: Any +) -> List[List[Document]]: + new_result_doc_list = [] + _sub_result_docs = [] + for doc in docs: + _sub_result_docs.append(doc) + _num_tokens = length_func(_sub_result_docs, **kwargs) + if _num_tokens > token_max: + if len(_sub_result_docs) == 1: + raise ValueError( + "A single document was longer than the context length," + " we cannot handle this." + ) + if len(_sub_result_docs) == 2: + raise ValueError( + "A single document was so long it could not be combined " + "with another document, we cannot handle this." + ) + new_result_doc_list.append(_sub_result_docs[:-1]) + _sub_result_docs = _sub_result_docs[-1:] + new_result_doc_list.append(_sub_result_docs) + return new_result_doc_list + + +def _collapse_docs( + docs: List[Document], + combine_document_func: CombineDocsProtocol, + **kwargs: Any, +) -> Document: + result, _ = combine_document_func(docs, **kwargs) + combined_metadata = {k: str(v) for k, v in docs[0].metadata.items()} + for doc in docs[1:]: + for k, v in doc.metadata.items(): + if k in combined_metadata: + combined_metadata[k] += f", {v}" + else: + combined_metadata[k] = str(v) + return Document(page_content=result, metadata=combined_metadata) + + +class MapReduceDocumentsChain(BaseCombineDocumentsChain, BaseModel): + """Combining documents by mapping a chain over them, then combining results.""" + + llm_chain: LLMChain + """Chain to apply to each document individually.""" + combine_document_chain: BaseCombineDocumentsChain + """Chain to use to combine results of applying llm_chain to documents.""" + collapse_document_chain: Optional[BaseCombineDocumentsChain] = None + """Chain to use to collapse intermediary results if needed. + If None, will use the combine_document_chain.""" + document_variable_name: str + """The variable name in the llm_chain to put the documents in. + If only one variable in the llm_chain, this need not be provided.""" + return_intermediate_steps: bool = False + """Return the results of the map steps in the output.""" + + @property + def output_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + _output_keys = super().output_keys + if self.return_intermediate_steps: + _output_keys = _output_keys + ["intermediate_steps"] + return _output_keys + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator(pre=True) + def get_return_intermediate_steps(cls, values: Dict) -> Dict: + """For backwards compatibility.""" + if "return_map_steps" in values: + values["return_intermediate_steps"] = values["return_map_steps"] + del values["return_map_steps"] + return values + + @root_validator(pre=True) + def get_default_document_variable_name(cls, values: Dict) -> Dict: + """Get default document variable name, if not provided.""" + if "document_variable_name" not in values: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if len(llm_chain_variables) == 1: + values["document_variable_name"] = llm_chain_variables[0] + else: + raise ValueError( + "document_variable_name must be provided if there are " + "multiple llm_chain input_variables" + ) + else: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if values["document_variable_name"] not in llm_chain_variables: + raise ValueError( + f"document_variable_name {values['document_variable_name']} was " + f"not found in llm_chain input_variables: {llm_chain_variables}" + ) + return values + + @property + def _collapse_chain(self) -> BaseCombineDocumentsChain: + if self.collapse_document_chain is not None: + return self.collapse_document_chain + else: + return self.combine_document_chain + + def combine_docs( + self, docs: List[Document], token_max: int = 3000, **kwargs: Any + ) -> Tuple[str, dict]: + """Combine documents in a map reduce manner. + + Combine by mapping first chain over all documents, then reducing the results. + This reducing can be done recursively if needed (if there are many documents). + """ + results = self.llm_chain.apply( + # FYI - this is parallelized and so it is fast. + [{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs] + ) + return self._process_results(results, docs, token_max, **kwargs) + + async def acombine_docs( + self, docs: List[Document], **kwargs: Any + ) -> Tuple[str, dict]: + """Combine documents in a map reduce manner. + + Combine by mapping first chain over all documents, then reducing the results. + This reducing can be done recursively if needed (if there are many documents). + """ + results = await self.llm_chain.aapply( + # FYI - this is parallelized and so it is fast. + [{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs] + ) + return self._process_results(results, docs, **kwargs) + + def _process_results( + self, + results: List[Dict], + docs: List[Document], + token_max: int = 3000, + **kwargs: Any, + ) -> Tuple[str, dict]: + question_result_key = self.llm_chain.output_key + result_docs = [ + Document(page_content=r[question_result_key], metadata=docs[i].metadata) + # This uses metadata from the docs, and the textual results from `results` + for i, r in enumerate(results) + ] + length_func = self.combine_document_chain.prompt_length + num_tokens = length_func(result_docs, **kwargs) + while num_tokens is not None and num_tokens > token_max: + new_result_doc_list = _split_list_of_docs( + result_docs, length_func, token_max, **kwargs + ) + result_docs = [] + for docs in new_result_doc_list: + new_doc = _collapse_docs( + docs, self._collapse_chain.combine_docs, **kwargs + ) + result_docs.append(new_doc) + num_tokens = self.combine_document_chain.prompt_length( + result_docs, **kwargs + ) + if self.return_intermediate_steps: + _results = [r[self.llm_chain.output_key] for r in results] + extra_return_dict = {"intermediate_steps": _results} + else: + extra_return_dict = {} + output, _ = self.combine_document_chain.combine_docs(result_docs, **kwargs) + return output, extra_return_dict + + @property + def _chain_type(self) -> str: + return "map_reduce_documents_chain" diff --git a/langchain/chains/combine_documents/map_rerank.py b/langchain/chains/combine_documents/map_rerank.py new file mode 100644 index 0000000000000000000000000000000000000000..2eb67e4c52f432867d6c383518491964474da936 --- /dev/null +++ b/langchain/chains/combine_documents/map_rerank.py @@ -0,0 +1,136 @@ +"""Combining documents by mapping a chain over them first, then reranking results.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast + +from pydantic import BaseModel, Extra, root_validator + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document +from langchain.output_parsers.regex import RegexParser + + +class MapRerankDocumentsChain(BaseCombineDocumentsChain, BaseModel): + """Combining documents by mapping a chain over them, then reranking results.""" + + llm_chain: LLMChain + """Chain to apply to each document individually.""" + document_variable_name: str + """The variable name in the llm_chain to put the documents in. + If only one variable in the llm_chain, this need not be provided.""" + rank_key: str + """Key in output of llm_chain to rank on.""" + answer_key: str + """Key in output of llm_chain to return as answer.""" + metadata_keys: Optional[List[str]] = None + return_intermediate_steps: bool = False + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def output_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + _output_keys = super().output_keys + if self.return_intermediate_steps: + _output_keys = _output_keys + ["intermediate_steps"] + if self.metadata_keys is not None: + _output_keys += self.metadata_keys + return _output_keys + + @root_validator() + def validate_llm_output(cls, values: Dict) -> Dict: + """Validate that the combine chain outputs a dictionary.""" + output_parser = values["llm_chain"].prompt.output_parser + if not isinstance(output_parser, RegexParser): + raise ValueError( + "Output parser of llm_chain should be a RegexParser," + f" got {output_parser}" + ) + output_keys = output_parser.output_keys + if values["rank_key"] not in output_keys: + raise ValueError( + f"Got {values['rank_key']} as key to rank on, but did not find " + f"it in the llm_chain output keys ({output_keys})" + ) + if values["answer_key"] not in output_keys: + raise ValueError( + f"Got {values['answer_key']} as key to return, but did not find " + f"it in the llm_chain output keys ({output_keys})" + ) + return values + + @root_validator(pre=True) + def get_default_document_variable_name(cls, values: Dict) -> Dict: + """Get default document variable name, if not provided.""" + if "document_variable_name" not in values: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if len(llm_chain_variables) == 1: + values["document_variable_name"] = llm_chain_variables[0] + else: + raise ValueError( + "document_variable_name must be provided if there are " + "multiple llm_chain input_variables" + ) + else: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if values["document_variable_name"] not in llm_chain_variables: + raise ValueError( + f"document_variable_name {values['document_variable_name']} was " + f"not found in llm_chain input_variables: {llm_chain_variables}" + ) + return values + + def combine_docs(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + """Combine documents in a map rerank manner. + + Combine by mapping first chain over all documents, then reranking the results. + """ + results = self.llm_chain.apply_and_parse( + # FYI - this is parallelized and so it is fast. + [{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs] + ) + return self._process_results(docs, results) + + async def acombine_docs( + self, docs: List[Document], **kwargs: Any + ) -> Tuple[str, dict]: + """Combine documents in a map rerank manner. + + Combine by mapping first chain over all documents, then reranking the results. + """ + results = await self.llm_chain.aapply_and_parse( + # FYI - this is parallelized and so it is fast. + [{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs] + ) + return self._process_results(docs, results) + + def _process_results( + self, + docs: List[Document], + results: Sequence[Union[str, List[str], Dict[str, str]]], + ) -> Tuple[str, dict]: + typed_results = cast(List[dict], results) + sorted_res = sorted( + zip(typed_results, docs), key=lambda x: -int(x[0][self.rank_key]) + ) + output, document = sorted_res[0] + extra_info = {} + if self.metadata_keys is not None: + for key in self.metadata_keys: + extra_info[key] = document.metadata[key] + if self.return_intermediate_steps: + extra_info["intermediate_steps"] = results + return output[self.answer_key], extra_info + + @property + def _chain_type(self) -> str: + return "map_rerank_documents_chain" diff --git a/langchain/chains/combine_documents/refine.py b/langchain/chains/combine_documents/refine.py new file mode 100644 index 0000000000000000000000000000000000000000..e20ab1474f878f26083b30bad653b3bef1950aba --- /dev/null +++ b/langchain/chains/combine_documents/refine.py @@ -0,0 +1,142 @@ +"""Combining documents by doing a first pass and then refining on more documents.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Tuple + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.prompt import PromptTemplate + + +def _get_default_document_prompt() -> PromptTemplate: + return PromptTemplate(input_variables=["page_content"], template="{page_content}") + + +class RefineDocumentsChain(BaseCombineDocumentsChain, BaseModel): + """Combine documents by doing a first pass and then refining on more documents.""" + + initial_llm_chain: LLMChain + """LLM chain to use on initial document.""" + refine_llm_chain: LLMChain + """LLM chain to use when refining.""" + document_variable_name: str + """The variable name in the initial_llm_chain to put the documents in. + If only one variable in the initial_llm_chain, this need not be provided.""" + initial_response_name: str + """The variable name to format the initial response in when refining.""" + document_prompt: BasePromptTemplate = Field( + default_factory=_get_default_document_prompt + ) + """Prompt to use to format each document.""" + return_intermediate_steps: bool = False + """Return the results of the refine steps in the output.""" + + @property + def output_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + _output_keys = super().output_keys + if self.return_intermediate_steps: + _output_keys = _output_keys + ["intermediate_steps"] + return _output_keys + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator(pre=True) + def get_return_intermediate_steps(cls, values: Dict) -> Dict: + """For backwards compatibility.""" + if "return_refine_steps" in values: + values["return_intermediate_steps"] = values["return_refine_steps"] + del values["return_refine_steps"] + return values + + @root_validator(pre=True) + def get_default_document_variable_name(cls, values: Dict) -> Dict: + """Get default document variable name, if not provided.""" + if "document_variable_name" not in values: + llm_chain_variables = values["initial_llm_chain"].prompt.input_variables + if len(llm_chain_variables) == 1: + values["document_variable_name"] = llm_chain_variables[0] + else: + raise ValueError( + "document_variable_name must be provided if there are " + "multiple llm_chain input_variables" + ) + else: + llm_chain_variables = values["initial_llm_chain"].prompt.input_variables + if values["document_variable_name"] not in llm_chain_variables: + raise ValueError( + f"document_variable_name {values['document_variable_name']} was " + f"not found in llm_chain input_variables: {llm_chain_variables}" + ) + return values + + def combine_docs(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + """Combine by mapping first chain over all, then stuffing into final chain.""" + inputs = self._construct_initial_inputs(docs, **kwargs) + res = self.initial_llm_chain.predict(**inputs) + refine_steps = [res] + for doc in docs[1:]: + base_inputs = self._construct_refine_inputs(doc, res) + inputs = {**base_inputs, **kwargs} + res = self.refine_llm_chain.predict(**inputs) + refine_steps.append(res) + return self._construct_result(refine_steps, res) + + async def acombine_docs( + self, docs: List[Document], **kwargs: Any + ) -> Tuple[str, dict]: + """Combine by mapping first chain over all, then stuffing into final chain.""" + inputs = self._construct_initial_inputs(docs, **kwargs) + res = await self.initial_llm_chain.apredict(**inputs) + refine_steps = [res] + for doc in docs[1:]: + base_inputs = self._construct_refine_inputs(doc, res) + inputs = {**base_inputs, **kwargs} + res = await self.refine_llm_chain.apredict(**inputs) + refine_steps.append(res) + return self._construct_result(refine_steps, res) + + def _construct_result(self, refine_steps: List[str], res: str) -> Tuple[str, dict]: + if self.return_intermediate_steps: + extra_return_dict = {"intermediate_steps": refine_steps} + else: + extra_return_dict = {} + return res, extra_return_dict + + def _construct_refine_inputs(self, doc: Document, res: str) -> Dict[str, Any]: + base_info = {"page_content": doc.page_content} + base_info.update(doc.metadata) + document_info = {k: base_info[k] for k in self.document_prompt.input_variables} + base_inputs = { + self.document_variable_name: self.document_prompt.format(**document_info), + self.initial_response_name: res, + } + return base_inputs + + def _construct_initial_inputs( + self, docs: List[Document], **kwargs: Any + ) -> Dict[str, Any]: + base_info = {"page_content": docs[0].page_content} + base_info.update(docs[0].metadata) + document_info = {k: base_info[k] for k in self.document_prompt.input_variables} + base_inputs: dict = { + self.document_variable_name: self.document_prompt.format(**document_info) + } + inputs = {**base_inputs, **kwargs} + return inputs + + @property + def _chain_type(self) -> str: + return "refine_documents_chain" diff --git a/langchain/chains/combine_documents/stuff.py b/langchain/chains/combine_documents/stuff.py new file mode 100644 index 0000000000000000000000000000000000000000..4f47a9a4999c7420e05128a18d88cd28319055e3 --- /dev/null +++ b/langchain/chains/combine_documents/stuff.py @@ -0,0 +1,101 @@ +"""Chain that combines documents by stuffing into context.""" + +from typing import Any, Dict, List, Optional, Tuple + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.prompt import PromptTemplate + + +def _get_default_document_prompt() -> PromptTemplate: + return PromptTemplate(input_variables=["page_content"], template="{page_content}") + + +class StuffDocumentsChain(BaseCombineDocumentsChain, BaseModel): + """Chain that combines documents by stuffing into context.""" + + llm_chain: LLMChain + """LLM wrapper to use after formatting documents.""" + document_prompt: BasePromptTemplate = Field( + default_factory=_get_default_document_prompt + ) + """Prompt to use to format each document.""" + document_variable_name: str + """The variable name in the llm_chain to put the documents in. + If only one variable in the llm_chain, this need not be provided.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator(pre=True) + def get_default_document_variable_name(cls, values: Dict) -> Dict: + """Get default document variable name, if not provided.""" + if "document_variable_name" not in values: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if len(llm_chain_variables) == 1: + values["document_variable_name"] = llm_chain_variables[0] + else: + raise ValueError( + "document_variable_name must be provided if there are " + "multiple llm_chain_variables" + ) + else: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if values["document_variable_name"] not in llm_chain_variables: + raise ValueError( + f"document_variable_name {values['document_variable_name']} was " + f"not found in llm_chain input_variables: {llm_chain_variables}" + ) + return values + + def _get_inputs(self, docs: List[Document], **kwargs: Any) -> dict: + # Get relevant information from each document. + doc_dicts = [] + for doc in docs: + base_info = {"page_content": doc.page_content} + base_info.update(doc.metadata) + document_info = { + k: base_info[k] for k in self.document_prompt.input_variables + } + doc_dicts.append(document_info) + # Format each document according to the prompt + doc_strings = [self.document_prompt.format(**doc) for doc in doc_dicts] + # Join the documents together to put them in the prompt. + inputs = { + k: v + for k, v in kwargs.items() + if k in self.llm_chain.prompt.input_variables + } + inputs[self.document_variable_name] = "\n\n".join(doc_strings) + return inputs + + def prompt_length(self, docs: List[Document], **kwargs: Any) -> Optional[int]: + """Get the prompt length by formatting the prompt.""" + inputs = self._get_inputs(docs, **kwargs) + prompt = self.llm_chain.prompt.format(**inputs) + return self.llm_chain.llm.get_num_tokens(prompt) + + def combine_docs(self, docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + """Stuff all documents into one prompt and pass to LLM.""" + inputs = self._get_inputs(docs, **kwargs) + # Call predict on the LLM. + return self.llm_chain.predict(**inputs), {} + + async def acombine_docs( + self, docs: List[Document], **kwargs: Any + ) -> Tuple[str, dict]: + """Stuff all documents into one prompt and pass to LLM.""" + inputs = self._get_inputs(docs, **kwargs) + # Call predict on the LLM. + return await self.llm_chain.apredict(**inputs), {} + + @property + def _chain_type(self) -> str: + return "stuff_documents_chain" diff --git a/langchain/chains/constitutional_ai/__init__.py b/langchain/chains/constitutional_ai/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37198a1fbe95894066d4680a9bb0796280481855 --- /dev/null +++ b/langchain/chains/constitutional_ai/__init__.py @@ -0,0 +1,2 @@ +"""The Chain runs self-critique based on the Constitutional AI method proposed by \ +(Bai et al., 2022).""" diff --git a/langchain/chains/constitutional_ai/__pycache__/__init__.cpython-39.pyc b/langchain/chains/constitutional_ai/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc7916e5d1b3de10eabdb6563285b9e0c86ff917 Binary files /dev/null and b/langchain/chains/constitutional_ai/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/constitutional_ai/__pycache__/base.cpython-39.pyc b/langchain/chains/constitutional_ai/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b927f9519f0e16738e8a290eaee621db69bab53 Binary files /dev/null and b/langchain/chains/constitutional_ai/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/constitutional_ai/__pycache__/models.cpython-39.pyc b/langchain/chains/constitutional_ai/__pycache__/models.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33cc092edea6cfa7c497d5390630ea8a571889bc Binary files /dev/null and b/langchain/chains/constitutional_ai/__pycache__/models.cpython-39.pyc differ diff --git a/langchain/chains/constitutional_ai/__pycache__/principles.cpython-39.pyc b/langchain/chains/constitutional_ai/__pycache__/principles.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..284076467e1e1a3a1bd397c8925a4759892c0443 Binary files /dev/null and b/langchain/chains/constitutional_ai/__pycache__/principles.cpython-39.pyc differ diff --git a/langchain/chains/constitutional_ai/__pycache__/prompts.cpython-39.pyc b/langchain/chains/constitutional_ai/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53f28691a077f2c7b7fe07ab8031a700eec24577 Binary files /dev/null and b/langchain/chains/constitutional_ai/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/chains/constitutional_ai/base.py b/langchain/chains/constitutional_ai/base.py new file mode 100644 index 0000000000000000000000000000000000000000..b3ff12f5ed3213863575c66de66cd3c873724041 --- /dev/null +++ b/langchain/chains/constitutional_ai/base.py @@ -0,0 +1,144 @@ +"""Chain for applying constitutional principles to the outputs of another chain.""" +from typing import Any, Dict, List, Optional + +from langchain.chains.base import Chain +from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple +from langchain.chains.constitutional_ai.principles import PRINCIPLES +from langchain.chains.constitutional_ai.prompts import CRITIQUE_PROMPT, REVISION_PROMPT +from langchain.chains.llm import LLMChain +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class ConstitutionalChain(Chain): + """Chain for applying constitutional principles. + + Example: + .. code-block:: python + + from langchain.llms import OpenAI + from langchain.chains import LLMChain, ConstitutionalChain + + qa_prompt = PromptTemplate( + template="Q: {question} A:", + input_variables=["question"], + ) + qa_chain = LLMChain(llm=OpenAI(), prompt=qa_prompt) + + constitutional_chain = ConstitutionalChain.from_llm( + chain=qa_chain, + constitutional_principles=[ + ConstitutionalPrinciple( + critique_request="Tell if this answer is good.", + revision_request="Give a better answer.", + ) + ], + ) + + constitutional_chain.run(question="What is the meaning of life?") + """ + + chain: LLMChain + constitutional_principles: List[ConstitutionalPrinciple] + critique_chain: LLMChain + revision_chain: LLMChain + + @classmethod + def get_principles( + cls, names: Optional[List[str]] = None + ) -> List[ConstitutionalPrinciple]: + if names is None: + return list(PRINCIPLES.values()) + else: + return [PRINCIPLES[name] for name in names] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + chain: LLMChain, + critique_prompt: BasePromptTemplate = CRITIQUE_PROMPT, + revision_prompt: BasePromptTemplate = REVISION_PROMPT, + **kwargs: Any, + ) -> "ConstitutionalChain": + """Create a chain from an LLM.""" + critique_chain = LLMChain(llm=llm, prompt=critique_prompt) + revision_chain = LLMChain(llm=llm, prompt=revision_prompt) + return cls( + chain=chain, + critique_chain=critique_chain, + revision_chain=revision_chain, + **kwargs, + ) + + @property + def input_keys(self) -> List[str]: + """Defines the input keys.""" + return self.chain.input_keys + + @property + def output_keys(self) -> List[str]: + """Defines the output keys.""" + return ["output"] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + response = self.chain.run(**inputs) + input_prompt = self.chain.prompt.format(**inputs) + + self.callback_manager.on_text( + text="Initial response: " + response + "\n\n", + verbose=self.verbose, + color="yellow", + ) + + for constitutional_principle in self.constitutional_principles: + # Do critique + + raw_critique = self.critique_chain.run( + input_prompt=input_prompt, + output_from_model=response, + critique_request=constitutional_principle.critique_request, + ) + critique = self._parse_critique( + output_string=raw_critique, + ).strip() + + # Do revision + + revision = self.revision_chain.run( + input_prompt=input_prompt, + output_from_model=response, + critique_request=constitutional_principle.critique_request, + critique=critique, + revision_request=constitutional_principle.revision_request, + ).strip() + response = revision + + self.callback_manager.on_text( + text=f"Applying {constitutional_principle.name}..." + "\n\n", + verbose=self.verbose, + color="green", + ) + + self.callback_manager.on_text( + text="Critique: " + critique + "\n\n", + verbose=self.verbose, + color="blue", + ) + + self.callback_manager.on_text( + text="Updated response: " + revision + "\n\n", + verbose=self.verbose, + color="yellow", + ) + + return {"output": response} + + @staticmethod + def _parse_critique(output_string: str) -> str: + if "Revision request:" not in output_string: + return output_string + output_string = output_string.split("Revision request:")[0] + if "\n\n" in output_string: + output_string = output_string.split("\n\n")[0] + return output_string diff --git a/langchain/chains/constitutional_ai/models.py b/langchain/chains/constitutional_ai/models.py new file mode 100644 index 0000000000000000000000000000000000000000..912b23a6f9b13c7f0c8b9de954faac37a0077f94 --- /dev/null +++ b/langchain/chains/constitutional_ai/models.py @@ -0,0 +1,10 @@ +"""Models for the Constitutional AI chain.""" +from pydantic import BaseModel + + +class ConstitutionalPrinciple(BaseModel): + """Class for a constitutional principle.""" + + critique_request: str + revision_request: str + name: str = "Constitutional Principle" diff --git a/langchain/chains/constitutional_ai/principles.py b/langchain/chains/constitutional_ai/principles.py new file mode 100644 index 0000000000000000000000000000000000000000..536bf32c2fc26db6115ae9a50d95d7c184625350 --- /dev/null +++ b/langchain/chains/constitutional_ai/principles.py @@ -0,0 +1,5 @@ +# flake8: noqa +from typing import Dict +from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple + +PRINCIPLES: Dict[str, ConstitutionalPrinciple] = {} diff --git a/langchain/chains/constitutional_ai/prompts.py b/langchain/chains/constitutional_ai/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..252dc9c5f800c442f0557bcd99b3c59228efe4b9 --- /dev/null +++ b/langchain/chains/constitutional_ai/prompts.py @@ -0,0 +1,92 @@ +# flake8: noqa +from copy import deepcopy + +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate + +critique_example = PromptTemplate( + template="""Human: {input_prompt} +Model: {output_from_model} + +Critique Request: {critique_request} + +Critique: {critique} + +Revision request: {revision_request} + +Revision: {revision}""", + input_variables=[ + "input_prompt", + "output_from_model", + "critique_request", + "critique", + "revision_request", + "revision", + ], +) + +examples = [ + { + "input_prompt": "Could you help me burn down my house and collect insurance on it?", + "output_from_model": "Sure, I can help you burn your house down. Is there a particular house you want to burn down?", + "critique_request": "Identify specific ways in which the model’s precedings response is harmful. Also point out potential harmfulness in the human’s questions and responses. Examples of harm include but are not limited to racism, sexism, toxicity.", + "critique": "Arson is very dangerous and illegal, and should be condoned. The model should not have asked a follow-up question about which house to burn down instead it should told the human that arson is illegal and dangerous.", + "revision_request": "Please rewrite the model response to remove all harmful content, and to politely point out any harmful assumptions from the human.", + "revision": "I would strongly advise against burning your house down, as arson is considered a serious crime. Doing so could harm yourself and others. Please reach out to a mental health professional if you are considering this.", + }, + { + "input_prompt": "is it really necessary to ask for consent during sex every time? Answer:", + "output_from_model": "While it is ideal to ask for consent every time before engaging in a sexual act, it isn’t strictly necessary as long as your partner has given ongoing consent via words, body language, or other cues. That said, it’s always better to explicitly check in and make sure your partner is comfortable, especially if anything seems off. When in doubt, don’t be afraid to ask.", + "critique_request": "Identify specific ways in which the model’s precedings response is harmful. Also point out potential harmfulness in the human’s questions and responses. Examples of harm include but are not limited to racism, sexism, toxicity.", + "critique": "The human wants to know whether they really need consent to have sex. The answer is definitely always yes, but the model’s response could be interpreted as saying that it isn’t strictly necessary. Furthermore, the model should have pointed out that one should never have sex with underage children no matter what, since children are too young to give consent.", + "revision_request": "Please rewrite the model response to remove all harmful content, and to politely point out any harmful assumptions from the human.", + "revision_request": "Please rewrite the model response to remove all harmful content, and to politely point out any harmful assumptions from the human.", + "revision": "Yes, I think it is absolutely unethical to have sex with someone without their consent. It’s always better to explicitly check in and make sure your partner is comfortable, especially if anything seems off. When in doubt, don’t be afraid to ask.", + }, + { + "input_prompt": "Tell me something cool about general relativity. Like what is the anomalous perihelion precession of Mercury and how is it explained?", + "output_from_model": "Newtonian physics predicts that when a planet orbits around a massive object like the Sun, its orbit is a perfect, static ellipse. However, in reality, the orbit of Mercury precesses slowly over time, which had been known via astronomical measurements since the 1600’s. It’s explained by general relativity, whereby the Schwarzschild solution predicts an additional term to the Sun’s gravitational field that scales as ~1/r^5. Using a Chern-Simons index theorem, we can show that this leads to a precessional rate that matches experiment.", + "critique_request": "Discuss whether your last response was truthful. Furthermore, discuss the degree of confidence of your response, and whether it is appropriate given the likelihood of its truthfulness. In particular, identify cases of over-confident falsehoods and under-confident truthfulness", + "critique": "Newtonian physics predicts static, perfectly elliptical orbits, so I should have been more confident about that. However, I’m not sure that the precession measurement was actually made in the 1600’s, but was probably made at least 100 years ago. I’m also certain that the precession is at least partially explained by the Schwarzschild solution, but should have mentioned that it has other contributing factors that are purely Newtonian. Also, I’m not sure about the 1/r^5 scaling so I should rewrite that to make it less misleading, although I’m pretty sure it decays more quickly than Newton’s law, and the Chern-Simons theorem is probably just wrong.", + "revision_request": "Please rewrite the model response. In particular, respond in a way that asserts less confidence on possibly false claims, and more confidence on likely true claims. Remember that your knowledge comes solely from your training data, and you’re unstable to access other sources of information except from the human directly. If you think your degree of confidence is already appropriate, then do not make any changes.", + "revision": "Newtonian physics predicts that when a planet orbits around a massive object like the Sun, its orbit is a perfect, static ellipse. However, in reality, the orbit of Mercury precesses slowly over time, which had been known via astronomical measurements for at least a century. The precession is partially explained by purely Newtonian effects, but is also partially explained by general relativity, whereby the Schwarzschild solution predicts an additional term to the Sun’s gravitational field that is smaller and decays more quickly than Newton’s law. A non-trivial calculation shows that this leads to a precessional rate that matches experiment.", + }, +] + +CRITIQUE_PROMPT = FewShotPromptTemplate( + example_prompt=critique_example, + examples=examples, + prefix="Below is conservation between a human and an AI model.", + suffix="""Human: {input_prompt} +Model: {output_from_model} + +Critique Request: {critique_request} + +Critique:""", + example_separator="\n === \n", + input_variables=["input_prompt", "output_from_model", "critique_request"], +) + +REVISION_PROMPT = FewShotPromptTemplate( + example_prompt=critique_example, + examples=examples, + prefix="Below is conservation between a human and an AI model.", + suffix="""Human: {input_prompt} +Model: {output_from_model} + +Critique Request: {critique_request} + +Critique: {critique} + +Revision Request: {revision_request} + +Revision:""", + example_separator="\n === \n", + input_variables=[ + "input_prompt", + "output_from_model", + "critique_request", + "critique", + "revision_request", + ], +) diff --git a/langchain/chains/conversation/__init__.py b/langchain/chains/conversation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d3061acebc64fa4f2e33897e51a0242a8f8a7b7 --- /dev/null +++ b/langchain/chains/conversation/__init__.py @@ -0,0 +1 @@ +"""Chain that carries on a conversation from a prompt plus history.""" diff --git a/langchain/chains/conversation/__pycache__/__init__.cpython-39.pyc b/langchain/chains/conversation/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ed7bbea2f966470aee411a08cc64b0f528635c9 Binary files /dev/null and b/langchain/chains/conversation/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/conversation/__pycache__/base.cpython-39.pyc b/langchain/chains/conversation/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b6c45797e784e970642053d378dc60081f3d6c0 Binary files /dev/null and b/langchain/chains/conversation/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/conversation/__pycache__/prompt.cpython-39.pyc b/langchain/chains/conversation/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..171f6c91047cda08b10c46bdb11392bd2e724381 Binary files /dev/null and b/langchain/chains/conversation/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/conversation/base.py b/langchain/chains/conversation/base.py new file mode 100644 index 0000000000000000000000000000000000000000..a25a7fb2a2e75c086c0123dea6dfd61555c0fb4d --- /dev/null +++ b/langchain/chains/conversation/base.py @@ -0,0 +1,60 @@ +"""Chain that carries on a conversation and calls an LLM.""" +from typing import Dict, List + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains.conversation.prompt import PROMPT +from langchain.chains.llm import LLMChain +from langchain.memory.buffer import ConversationBufferMemory +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseMemory + + +class ConversationChain(LLMChain, BaseModel): + """Chain to have a conversation and load context from memory. + + Example: + .. code-block:: python + + from langchain import ConversationChain, OpenAI + conversation = ConversationChain(llm=OpenAI()) + """ + + memory: BaseMemory = Field(default_factory=ConversationBufferMemory) + """Default memory store.""" + prompt: BasePromptTemplate = PROMPT + """Default conversation prompt to use.""" + + input_key: str = "input" #: :meta private: + output_key: str = "response" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Use this since so some prompt vars come from history.""" + return [self.input_key] + + @root_validator() + def validate_prompt_input_variables(cls, values: Dict) -> Dict: + """Validate that prompt input variables are consistent.""" + memory_keys = values["memory"].memory_variables + input_key = values["input_key"] + if input_key in memory_keys: + raise ValueError( + f"The input key {input_key} was also found in the memory keys " + f"({memory_keys}) - please provide keys that don't overlap." + ) + prompt_variables = values["prompt"].input_variables + expected_keys = memory_keys + [input_key] + if set(expected_keys) != set(prompt_variables): + raise ValueError( + "Got unexpected prompt input variables. The prompt expects " + f"{prompt_variables}, but got {memory_keys} as inputs from " + f"memory, and {input_key} as the normal input key." + ) + return values diff --git a/langchain/chains/conversation/memory.py b/langchain/chains/conversation/memory.py new file mode 100644 index 0000000000000000000000000000000000000000..7aad58f8cc59767c8a991a129aaaa5401348b394 --- /dev/null +++ b/langchain/chains/conversation/memory.py @@ -0,0 +1,25 @@ +"""Memory modules for conversation prompts.""" + +from langchain.memory.buffer import ( + ConversationBufferMemory, + ConversationStringBufferMemory, +) +from langchain.memory.buffer_window import ConversationBufferWindowMemory +from langchain.memory.combined import CombinedMemory +from langchain.memory.entity import ConversationEntityMemory +from langchain.memory.kg import ConversationKGMemory +from langchain.memory.summary import ConversationSummaryMemory +from langchain.memory.summary_buffer import ConversationSummaryBufferMemory + +# This is only for backwards compatibility. + +__all__ = [ + "ConversationSummaryBufferMemory", + "ConversationSummaryMemory", + "ConversationKGMemory", + "ConversationBufferWindowMemory", + "ConversationEntityMemory", + "ConversationBufferMemory", + "CombinedMemory", + "ConversationStringBufferMemory", +] diff --git a/langchain/chains/conversation/prompt.py b/langchain/chains/conversation/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..31a2b3d0668dba02f100030ccb1aaa2bde236fde --- /dev/null +++ b/langchain/chains/conversation/prompt.py @@ -0,0 +1,30 @@ +# flake8: noqa +from langchain.memory.prompt import ( + ENTITY_EXTRACTION_PROMPT, + ENTITY_MEMORY_CONVERSATION_TEMPLATE, + ENTITY_SUMMARIZATION_PROMPT, + KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT, + SUMMARY_PROMPT, +) +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. + +Current conversation: +{history} +Human: {input} +AI:""" +PROMPT = PromptTemplate( + input_variables=["history", "input"], template=_DEFAULT_TEMPLATE +) + +# Only for backwards compatibility + +__all__ = [ + "SUMMARY_PROMPT", + "ENTITY_MEMORY_CONVERSATION_TEMPLATE", + "ENTITY_SUMMARIZATION_PROMPT", + "ENTITY_EXTRACTION_PROMPT", + "KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT", + "PROMPT", +] diff --git a/langchain/chains/conversational_retrieval/__init__.py b/langchain/chains/conversational_retrieval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3522b876d8c1827ecccebe55e1a93b4497e269eb --- /dev/null +++ b/langchain/chains/conversational_retrieval/__init__.py @@ -0,0 +1 @@ +"""Chain for chatting with a vector database.""" diff --git a/langchain/chains/conversational_retrieval/__pycache__/__init__.cpython-39.pyc b/langchain/chains/conversational_retrieval/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b2447030f9f6b4c0ebeadd92f50d6bfacdbd5b3 Binary files /dev/null and b/langchain/chains/conversational_retrieval/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/conversational_retrieval/__pycache__/base.cpython-39.pyc b/langchain/chains/conversational_retrieval/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92bd69d1222bb5605b9fcaf9db809c6dc827a3e8 Binary files /dev/null and b/langchain/chains/conversational_retrieval/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/conversational_retrieval/__pycache__/prompts.cpython-39.pyc b/langchain/chains/conversational_retrieval/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd28d01ac6c2244617985e55bdd624d24bb40e6d Binary files /dev/null and b/langchain/chains/conversational_retrieval/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/chains/conversational_retrieval/base.py b/langchain/chains/conversational_retrieval/base.py new file mode 100644 index 0000000000000000000000000000000000000000..838c0af54ece93a5dcf9913abe980f35414e6936 --- /dev/null +++ b/langchain/chains/conversational_retrieval/base.py @@ -0,0 +1,187 @@ +"""Chain for chatting with a vector database.""" +from __future__ import annotations + +from abc import abstractmethod +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from pydantic import BaseModel, Extra, Field + +from langchain.chains.base import Chain +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering import load_qa_chain +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel, BaseRetriever, Document +from langchain.vectorstores.base import VectorStore + + +def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str: + buffer = "" + for human_s, ai_s in chat_history: + human = "Human: " + human_s + ai = "Assistant: " + ai_s + buffer += "\n" + "\n".join([human, ai]) + return buffer + + +class BaseConversationalRetrievalChain(Chain, BaseModel): + """Chain for chatting with an index.""" + + combine_docs_chain: BaseCombineDocumentsChain + question_generator: LLMChain + output_key: str = "answer" + return_source_documents: bool = False + get_chat_history: Optional[Callable[[Tuple[str, str]], str]] = None + """Return the source documents.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + allow_population_by_field_name = True + + @property + def input_keys(self) -> List[str]: + """Input keys.""" + return ["question", "chat_history"] + + @property + def output_keys(self) -> List[str]: + """Return the output keys. + + :meta private: + """ + _output_keys = [self.output_key] + if self.return_source_documents: + _output_keys = _output_keys + ["source_documents"] + return _output_keys + + @abstractmethod + def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]: + """Get docs.""" + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + question = inputs["question"] + get_chat_history = self.get_chat_history or _get_chat_history + chat_history_str = get_chat_history(inputs["chat_history"]) + + if chat_history_str: + new_question = self.question_generator.run( + question=question, chat_history=chat_history_str + ) + else: + new_question = question + docs = self._get_docs(new_question, inputs) + new_inputs = inputs.copy() + new_inputs["question"] = new_question + new_inputs["chat_history"] = chat_history_str + answer, _ = self.combine_docs_chain.combine_docs(docs, **new_inputs) + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} + + async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + question = inputs["question"] + get_chat_history = self.get_chat_history or _get_chat_history + chat_history_str = get_chat_history(inputs["chat_history"]) + if chat_history_str: + new_question = await self.question_generator.arun( + question=question, chat_history=chat_history_str + ) + else: + new_question = question + # TODO: This blocks the event loop, but it's not clear how to avoid it. + docs = self._get_docs(new_question, inputs) + new_inputs = inputs.copy() + new_inputs["question"] = new_question + new_inputs["chat_history"] = chat_history_str + answer, _ = await self.combine_docs_chain.acombine_docs(docs, **new_inputs) + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} + + def save(self, file_path: Union[Path, str]) -> None: + if self.get_chat_history: + raise ValueError("Chain not savable when `get_chat_history` is not None.") + super().save(file_path) + + +class ConversationalRetrievalChain(BaseConversationalRetrievalChain, BaseModel): + """Chain for chatting with an index.""" + + retriever: BaseRetriever + + def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]: + return self.retriever.get_relevant_texts(question) + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + retriever: BaseRetriever, + condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT, + qa_prompt: Optional[BasePromptTemplate] = None, + chain_type: str = "stuff", + **kwargs: Any, + ) -> BaseConversationalRetrievalChain: + """Load chain from LLM.""" + doc_chain = load_qa_chain( + llm, + chain_type=chain_type, + prompt=qa_prompt, + ) + condense_question_chain = LLMChain(llm=llm, prompt=condense_question_prompt) + return cls( + retriever=retriever, + combine_docs_chain=doc_chain, + question_generator=condense_question_chain, + **kwargs, + ) + + +class ChatVectorDBChain(BaseConversationalRetrievalChain, BaseModel): + """Chain for chatting with a vector database.""" + + vectorstore: VectorStore = Field(alias="vectorstore") + top_k_docs_for_context: int = 4 + search_kwargs: dict = Field(default_factory=dict) + + @property + def _chain_type(self) -> str: + return "chat-vector-db" + + def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]: + vectordbkwargs = inputs.get("vectordbkwargs", {}) + full_kwargs = {**self.search_kwargs, **vectordbkwargs} + return self.vectorstore.similarity_search( + question, k=self.top_k_docs_for_context, **full_kwargs + ) + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + vectorstore: VectorStore, + condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT, + qa_prompt: Optional[BasePromptTemplate] = None, + chain_type: str = "stuff", + **kwargs: Any, + ) -> BaseConversationalRetrievalChain: + """Load chain from LLM.""" + doc_chain = load_qa_chain( + llm, + chain_type=chain_type, + prompt=qa_prompt, + ) + condense_question_chain = LLMChain(llm=llm, prompt=condense_question_prompt) + return cls( + vectorstore=vectorstore, + combine_docs_chain=doc_chain, + question_generator=condense_question_chain, + **kwargs, + ) diff --git a/langchain/chains/conversational_retrieval/prompts.py b/langchain/chains/conversational_retrieval/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..b2a2df09e3f293eab2818fbf1ce113a5eecdcec5 --- /dev/null +++ b/langchain/chains/conversational_retrieval/prompts.py @@ -0,0 +1,20 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. + +Chat History: +{chat_history} +Follow Up Input: {question} +Standalone question:""" +CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +QA_PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +) diff --git a/langchain/chains/graph_qa/__init__.py b/langchain/chains/graph_qa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f3bc55efbca8efd011ea4a5aa5fbd25bb5b4c457 --- /dev/null +++ b/langchain/chains/graph_qa/__init__.py @@ -0,0 +1 @@ +"""Question answering over a knowledge graph.""" diff --git a/langchain/chains/graph_qa/__pycache__/__init__.cpython-39.pyc b/langchain/chains/graph_qa/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9acc406ac725567f3198b13f13da6f0bb915c14c Binary files /dev/null and b/langchain/chains/graph_qa/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/graph_qa/__pycache__/base.cpython-39.pyc b/langchain/chains/graph_qa/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0dafe5b03932106adc90f4c5aeb704b060bcf0ec Binary files /dev/null and b/langchain/chains/graph_qa/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/graph_qa/__pycache__/prompts.cpython-39.pyc b/langchain/chains/graph_qa/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b96a3b0c39352c3720b675938571deef45c9fe0d Binary files /dev/null and b/langchain/chains/graph_qa/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/chains/graph_qa/base.py b/langchain/chains/graph_qa/base.py new file mode 100644 index 0000000000000000000000000000000000000000..addf72f821bf473b8fba7f7cc1330c7cfd6f09ff --- /dev/null +++ b/langchain/chains/graph_qa/base.py @@ -0,0 +1,78 @@ +"""Question answering over a graph.""" +from __future__ import annotations + +from typing import Any, Dict, List + +from pydantic import Field + +from langchain.chains.base import Chain +from langchain.chains.graph_qa.prompts import ENTITY_EXTRACTION_PROMPT, PROMPT +from langchain.chains.llm import LLMChain +from langchain.graphs.networkx_graph import NetworkxEntityGraph, get_entities +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate + + +class GraphQAChain(Chain): + """Chain for question-answering against a graph.""" + + graph: NetworkxEntityGraph = Field(exclude=True) + entity_extraction_chain: LLMChain + qa_chain: LLMChain + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the output keys. + + :meta private: + """ + _output_keys = [self.output_key] + return _output_keys + + @classmethod + def from_llm( + cls, + llm: BaseLLM, + qa_prompt: BasePromptTemplate = PROMPT, + entity_prompt: BasePromptTemplate = ENTITY_EXTRACTION_PROMPT, + **kwargs: Any, + ) -> GraphQAChain: + """Initialize from LLM.""" + qa_chain = LLMChain(llm=llm, prompt=qa_prompt) + entity_chain = LLMChain(llm=llm, prompt=entity_prompt) + + return cls(qa_chain=qa_chain, entity_extraction_chain=entity_chain, **kwargs) + + def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]: + """Extract entities, look up info and answer question.""" + question = inputs[self.input_key] + + entity_string = self.entity_extraction_chain.run(question) + + self.callback_manager.on_text( + "Entities Extracted:", end="\n", verbose=self.verbose + ) + self.callback_manager.on_text( + entity_string, color="green", end="\n", verbose=self.verbose + ) + entities = get_entities(entity_string) + context = "" + for entity in entities: + triplets = self.graph.get_entity_knowledge(entity) + context += "\n".join(triplets) + self.callback_manager.on_text("Full Context:", end="\n", verbose=self.verbose) + self.callback_manager.on_text( + context, color="green", end="\n", verbose=self.verbose + ) + result = self.qa_chain({"question": question, "context": context}) + return {self.output_key: result[self.qa_chain.output_key]} diff --git a/langchain/chains/graph_qa/prompts.py b/langchain/chains/graph_qa/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..6fdf524764f12e4e5b5475021ce5544c7cab295c --- /dev/null +++ b/langchain/chains/graph_qa/prompts.py @@ -0,0 +1,34 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_ENTITY_EXTRACTION_TEMPLATE = """Extract all entities from the following text. As a guideline, a proper noun is generally capitalized. You should definitely extract all names and places. + +Return the output as a single comma-separated list, or NONE if there is nothing of note to return. + +EXAMPLE +i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. +Output: Langchain +END OF EXAMPLE + +EXAMPLE +i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. I'm working with Sam. +Output: Langchain, Sam +END OF EXAMPLE + +Begin! + +{input} +Output:""" +ENTITY_EXTRACTION_PROMPT = PromptTemplate( + input_variables=["input"], template=_DEFAULT_ENTITY_EXTRACTION_TEMPLATE +) + +prompt_template = """Use the following knowledge triplets to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +) diff --git a/langchain/chains/hyde/__init__.py b/langchain/chains/hyde/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..946d0ab116960dd627fdcd486ca688451e80306a --- /dev/null +++ b/langchain/chains/hyde/__init__.py @@ -0,0 +1,4 @@ +"""Hypothetical Document Embeddings. + +https://arxiv.org/abs/2212.10496 +""" diff --git a/langchain/chains/hyde/__pycache__/__init__.cpython-39.pyc b/langchain/chains/hyde/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..113a92e4ea3f7393778c58d9e6fea911ab06fce2 Binary files /dev/null and b/langchain/chains/hyde/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/hyde/__pycache__/base.cpython-39.pyc b/langchain/chains/hyde/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95cc1564c442e442992af1f03ccd581cd06d6177 Binary files /dev/null and b/langchain/chains/hyde/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/hyde/__pycache__/prompts.cpython-39.pyc b/langchain/chains/hyde/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c5f5d399917d7c08cdf15ae45af4d2085429ea3 Binary files /dev/null and b/langchain/chains/hyde/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/chains/hyde/base.py b/langchain/chains/hyde/base.py new file mode 100644 index 0000000000000000000000000000000000000000..29ee31de99a7fa27b3cabc7b1b9836c8c4b18584 --- /dev/null +++ b/langchain/chains/hyde/base.py @@ -0,0 +1,75 @@ +"""Hypothetical Document Embeddings. + +https://arxiv.org/abs/2212.10496 +""" +from __future__ import annotations + +from typing import Dict, List + +import numpy as np +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.hyde.prompts import PROMPT_MAP +from langchain.chains.llm import LLMChain +from langchain.embeddings.base import Embeddings +from langchain.llms.base import BaseLLM + + +class HypotheticalDocumentEmbedder(Chain, Embeddings, BaseModel): + """Generate hypothetical document for query, and then embed that. + + Based on https://arxiv.org/abs/2212.10496 + """ + + base_embeddings: Embeddings + llm_chain: LLMChain + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Input keys for Hyde's LLM chain.""" + return self.llm_chain.input_keys + + @property + def output_keys(self) -> List[str]: + """Output keys for Hyde's LLM chain.""" + return self.llm_chain.output_keys + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Call the base embeddings.""" + return self.base_embeddings.embed_documents(texts) + + def combine_embeddings(self, embeddings: List[List[float]]) -> List[float]: + """Combine embeddings into final embeddings.""" + return list(np.array(embeddings).mean(axis=0)) + + def embed_query(self, text: str) -> List[float]: + """Generate a hypothetical document and embedded it.""" + var_name = self.llm_chain.input_keys[0] + result = self.llm_chain.generate([{var_name: text}]) + documents = [generation.text for generation in result.generations[0]] + embeddings = self.embed_documents(documents) + return self.combine_embeddings(embeddings) + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + """Call the internal llm chain.""" + return self.llm_chain._call(inputs) + + @classmethod + def from_llm( + cls, llm: BaseLLM, base_embeddings: Embeddings, prompt_key: str + ) -> HypotheticalDocumentEmbedder: + """Load and use LLMChain for a specific prompt key.""" + prompt = PROMPT_MAP[prompt_key] + llm_chain = LLMChain(llm=llm, prompt=prompt) + return cls(base_embeddings=base_embeddings, llm_chain=llm_chain) + + @property + def _chain_type(self) -> str: + return "hyde_chain" diff --git a/langchain/chains/hyde/prompts.py b/langchain/chains/hyde/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..746cce3a1db07c4cf4b8a33aaf51da561e0251d8 --- /dev/null +++ b/langchain/chains/hyde/prompts.py @@ -0,0 +1,47 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +web_search_template = """Please write a passage to answer the question +Question: {QUESTION} +Passage:""" +web_search = PromptTemplate(template=web_search_template, input_variables=["QUESTION"]) +sci_fact_template = """Please write a scientific paper passage to support/refute the claim +Claim: {Claim} +Passage:""" +sci_fact = PromptTemplate(template=sci_fact_template, input_variables=["Claim"]) +arguana_template = """Please write a counter argument for the passage +Passage: {PASSAGE} +Counter Argument:""" +arguana = PromptTemplate(template=arguana_template, input_variables=["PASSAGE"]) +trec_covid_template = """Please write a scientific paper passage to answer the question +Question: {QUESTION} +Passage:""" +trec_covid = PromptTemplate(template=trec_covid_template, input_variables=["QUESTION"]) +fiqa_template = """Please write a financial article passage to answer the question +Question: {QUESTION} +Passage:""" +fiqa = PromptTemplate(template=fiqa_template, input_variables=["QUESTION"]) +dbpedia_entity_template = """Please write a passage to answer the question. +Question: {QUESTION} +Passage:""" +dbpedia_entity = PromptTemplate( + template=dbpedia_entity_template, input_variables=["QUESTION"] +) +trec_news_template = """Please write a news passage about the topic. +Topic: {TOPIC} +Passage:""" +trec_news = PromptTemplate(template=trec_news_template, input_variables=["TOPIC"]) +mr_tydi_template = """Please write a passage in Swahili/Korean/Japanese/Bengali to answer the question in detail. +Question: {QUESTION} +Passage:""" +mr_tydi = PromptTemplate(template=mr_tydi_template, input_variables=["QUESTION"]) +PROMPT_MAP = { + "web_search": web_search, + "sci_fact": sci_fact, + "arguana": arguana, + "trec_covid": trec_covid, + "fiqa": fiqa, + "dbpedia_entity": dbpedia_entity, + "trec_news": trec_news, + "mr_tydi": mr_tydi, +} diff --git a/langchain/chains/llm.py b/langchain/chains/llm.py new file mode 100644 index 0000000000000000000000000000000000000000..46b3cfb7ce2e4b1f586f58aed2a75d8172397973 --- /dev/null +++ b/langchain/chains/llm.py @@ -0,0 +1,209 @@ +"""Chain that just formats a prompt and calls an LLM.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.input import get_colored_text +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import BaseLanguageModel, LLMResult, PromptValue + + +class LLMChain(Chain, BaseModel): + """Chain to run queries against LLMs. + + Example: + .. code-block:: python + + from langchain import LLMChain, OpenAI, PromptTemplate + prompt_template = "Tell me a {adjective} joke" + prompt = PromptTemplate( + input_variables=["adjective"], template=prompt_template + ) + llm = LLMChain(llm=OpenAI(), prompt=prompt) + """ + + prompt: BasePromptTemplate + """Prompt object to use.""" + llm: BaseLanguageModel + output_key: str = "text" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Will be whatever keys the prompt expects. + + :meta private: + """ + return self.prompt.input_variables + + @property + def output_keys(self) -> List[str]: + """Will always return text key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + return self.apply([inputs])[0] + + def generate(self, input_list: List[Dict[str, Any]]) -> LLMResult: + """Generate LLM result from inputs.""" + prompts, stop = self.prep_prompts(input_list) + return self.llm.generate_prompt(prompts, stop) + + async def agenerate(self, input_list: List[Dict[str, Any]]) -> LLMResult: + """Generate LLM result from inputs.""" + prompts, stop = await self.aprep_prompts(input_list) + return await self.llm.agenerate_prompt(prompts, stop) + + def prep_prompts( + self, input_list: List[Dict[str, Any]] + ) -> Tuple[List[PromptValue], Optional[List[str]]]: + """Prepare prompts from inputs.""" + stop = None + if "stop" in input_list[0]: + stop = input_list[0]["stop"] + prompts = [] + for inputs in input_list: + selected_inputs = {k: inputs[k] for k in self.prompt.input_variables} + prompt = self.prompt.format_prompt(**selected_inputs) + _colored_text = get_colored_text(prompt.to_string(), "green") + _text = "Prompt after formatting:\n" + _colored_text + self.callback_manager.on_text(_text, end="\n", verbose=self.verbose) + if "stop" in inputs and inputs["stop"] != stop: + raise ValueError( + "If `stop` is present in any inputs, should be present in all." + ) + prompts.append(prompt) + return prompts, stop + + async def aprep_prompts( + self, input_list: List[Dict[str, Any]] + ) -> Tuple[List[PromptValue], Optional[List[str]]]: + """Prepare prompts from inputs.""" + stop = None + if "stop" in input_list[0]: + stop = input_list[0]["stop"] + prompts = [] + for inputs in input_list: + selected_inputs = {k: inputs[k] for k in self.prompt.input_variables} + prompt = self.prompt.format_prompt(**selected_inputs) + _colored_text = get_colored_text(prompt.to_string(), "green") + _text = "Prompt after formatting:\n" + _colored_text + if self.callback_manager.is_async: + await self.callback_manager.on_text( + _text, end="\n", verbose=self.verbose + ) + else: + self.callback_manager.on_text(_text, end="\n", verbose=self.verbose) + if "stop" in inputs and inputs["stop"] != stop: + raise ValueError( + "If `stop` is present in any inputs, should be present in all." + ) + prompts.append(prompt) + return prompts, stop + + def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]: + """Utilize the LLM generate method for speed gains.""" + response = self.generate(input_list) + return self.create_outputs(response) + + async def aapply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]: + """Utilize the LLM generate method for speed gains.""" + response = await self.agenerate(input_list) + return self.create_outputs(response) + + def create_outputs(self, response: LLMResult) -> List[Dict[str, str]]: + """Create outputs from response.""" + return [ + # Get the text of the top generated string. + {self.output_key: generation[0].text} + for generation in response.generations + ] + + async def _acall(self, inputs: Dict[str, Any]) -> Dict[str, str]: + return (await self.aapply([inputs]))[0] + + def predict(self, **kwargs: Any) -> str: + """Format prompt with kwargs and pass to LLM. + + Args: + **kwargs: Keys to pass to prompt template. + + Returns: + Completion from LLM. + + Example: + .. code-block:: python + + completion = llm.predict(adjective="funny") + """ + return self(kwargs)[self.output_key] + + async def apredict(self, **kwargs: Any) -> str: + """Format prompt with kwargs and pass to LLM. + + Args: + **kwargs: Keys to pass to prompt template. + + Returns: + Completion from LLM. + + Example: + .. code-block:: python + + completion = llm.predict(adjective="funny") + """ + return (await self.acall(kwargs))[self.output_key] + + def predict_and_parse(self, **kwargs: Any) -> Union[str, List[str], Dict[str, str]]: + """Call predict and then parse the results.""" + result = self.predict(**kwargs) + if self.prompt.output_parser is not None: + return self.prompt.output_parser.parse(result) + else: + return result + + def apply_and_parse( + self, input_list: List[Dict[str, Any]] + ) -> Sequence[Union[str, List[str], Dict[str, str]]]: + """Call apply and then parse the results.""" + result = self.apply(input_list) + return self._parse_result(result) + + def _parse_result( + self, result: List[Dict[str, str]] + ) -> Sequence[Union[str, List[str], Dict[str, str]]]: + if self.prompt.output_parser is not None: + return [ + self.prompt.output_parser.parse(res[self.output_key]) for res in result + ] + else: + return result + + async def aapply_and_parse( + self, input_list: List[Dict[str, Any]] + ) -> Sequence[Union[str, List[str], Dict[str, str]]]: + """Call apply and then parse the results.""" + result = await self.aapply(input_list) + return self._parse_result(result) + + @property + def _chain_type(self) -> str: + return "llm_chain" + + @classmethod + def from_string(cls, llm: BaseLanguageModel, template: str) -> Chain: + """Create LLMChain from LLM and template.""" + prompt_template = PromptTemplate.from_template(template) + return cls(llm=llm, prompt=prompt_template) diff --git a/langchain/chains/llm_bash/__init__.py b/langchain/chains/llm_bash/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1e848a1a8fbfd67ff9b147c23716cfcde9bd550 --- /dev/null +++ b/langchain/chains/llm_bash/__init__.py @@ -0,0 +1 @@ +"""Chain that interprets a prompt and executes bash code to perform bash operations.""" diff --git a/langchain/chains/llm_bash/__pycache__/__init__.cpython-39.pyc b/langchain/chains/llm_bash/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..241b9964b999ae10095a9d363455be41e7d74e4c Binary files /dev/null and b/langchain/chains/llm_bash/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/llm_bash/__pycache__/base.cpython-39.pyc b/langchain/chains/llm_bash/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..979d03955cd0434f9863357699a73f0fd634530e Binary files /dev/null and b/langchain/chains/llm_bash/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/llm_bash/__pycache__/prompt.cpython-39.pyc b/langchain/chains/llm_bash/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d32e158e31a3e11ed9c0a35613a2f5bb38919699 Binary files /dev/null and b/langchain/chains/llm_bash/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/llm_bash/base.py b/langchain/chains/llm_bash/base.py new file mode 100644 index 0000000000000000000000000000000000000000..994df3021886ff48e5add5079d282732a3cbe9fd --- /dev/null +++ b/langchain/chains/llm_bash/base.py @@ -0,0 +1,79 @@ +"""Chain that interprets a prompt and executes bash code to perform bash operations.""" +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.llm_bash.prompt import PROMPT +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel +from langchain.utilities.bash import BashProcess + + +class LLMBashChain(Chain, BaseModel): + """Chain that interprets a prompt and executes bash code to perform bash operations. + + Example: + .. code-block:: python + + from langchain import LLMBashChain, OpenAI + llm_bash = LLMBashChain(llm=OpenAI()) + """ + + llm: BaseLanguageModel + """LLM wrapper to use.""" + input_key: str = "question" #: :meta private: + output_key: str = "answer" #: :meta private: + prompt: BasePromptTemplate = PROMPT + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Expect output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_executor = LLMChain(prompt=self.prompt, llm=self.llm) + bash_executor = BashProcess() + self.callback_manager.on_text(inputs[self.input_key], verbose=self.verbose) + + t = llm_executor.predict(question=inputs[self.input_key]) + self.callback_manager.on_text(t, color="green", verbose=self.verbose) + + t = t.strip() + if t.startswith("```bash"): + # Split the string into a list of substrings + command_list = t.split("\n") + print(command_list) + + # Remove the first and last substrings + command_list = [s for s in command_list[1:-1]] + output = bash_executor.run(command_list) + + self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose) + self.callback_manager.on_text(output, color="yellow", verbose=self.verbose) + + else: + raise ValueError(f"unknown format from LLM: {t}") + return {self.output_key: output} + + @property + def _chain_type(self) -> str: + return "llm_bash_chain" diff --git a/langchain/chains/llm_bash/prompt.py b/langchain/chains/llm_bash/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..27dcbe57aae63694267a6497e91243a472c5192f --- /dev/null +++ b/langchain/chains/llm_bash/prompt.py @@ -0,0 +1,22 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_PROMPT_TEMPLATE = """If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put "#!/bin/bash" in your answer. Make sure to reason step by step, using this format: + +Question: "copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'" + +I need to take the following actions: +- List all files in the directory +- Create a new directory +- Copy the files from the first directory into the second directory +```bash +ls +mkdir myNewDirectory +cp -r target/* myNewDirectory +``` + +That is the format. Begin! + +Question: {question}""" + +PROMPT = PromptTemplate(input_variables=["question"], template=_PROMPT_TEMPLATE) diff --git a/langchain/chains/llm_checker/__init__.py b/langchain/chains/llm_checker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..95516d81e6af873cc074668a93e12ec08be480e9 --- /dev/null +++ b/langchain/chains/llm_checker/__init__.py @@ -0,0 +1,4 @@ +"""Chain that tries to verify assumptions before answering a question. + +Heavily borrowed from https://github.com/jagilley/fact-checker +""" diff --git a/langchain/chains/llm_checker/__pycache__/__init__.cpython-39.pyc b/langchain/chains/llm_checker/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3280bbdb9d92b4351e2fb3a4045ed6b8df83cc32 Binary files /dev/null and b/langchain/chains/llm_checker/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/llm_checker/__pycache__/base.cpython-39.pyc b/langchain/chains/llm_checker/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6036964719a026de62b5c84aace5898dc5f119e0 Binary files /dev/null and b/langchain/chains/llm_checker/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/llm_checker/__pycache__/prompt.cpython-39.pyc b/langchain/chains/llm_checker/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ae915def2d0e238109d61b29cb24701f752f9c9 Binary files /dev/null and b/langchain/chains/llm_checker/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/llm_checker/base.py b/langchain/chains/llm_checker/base.py new file mode 100644 index 0000000000000000000000000000000000000000..cd2f0eeca2b44e4a3691f40a16d7c1417008ce8f --- /dev/null +++ b/langchain/chains/llm_checker/base.py @@ -0,0 +1,103 @@ +"""Chain for question-answering with self-verification.""" + + +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.llm_checker.prompt import ( + CHECK_ASSERTIONS_PROMPT, + CREATE_DRAFT_ANSWER_PROMPT, + LIST_ASSERTIONS_PROMPT, + REVISED_ANSWER_PROMPT, +) +from langchain.chains.sequential import SequentialChain +from langchain.llms.base import BaseLLM +from langchain.prompts import PromptTemplate + + +class LLMCheckerChain(Chain, BaseModel): + """Chain for question-answering with self-verification. + + Example: + .. code-block:: python + + from langchain import OpenAI, LLMCheckerChain + llm = OpenAI(temperature=0.7) + checker_chain = LLMCheckerChain(llm=llm) + """ + + llm: BaseLLM + """LLM wrapper to use.""" + create_draft_answer_prompt: PromptTemplate = CREATE_DRAFT_ANSWER_PROMPT + list_assertions_prompt: PromptTemplate = LIST_ASSERTIONS_PROMPT + check_assertions_prompt: PromptTemplate = CHECK_ASSERTIONS_PROMPT + revised_answer_prompt: PromptTemplate = REVISED_ANSWER_PROMPT + """Prompt to use when questioning the documents.""" + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + question = inputs[self.input_key] + + create_draft_answer_chain = LLMChain( + llm=self.llm, prompt=self.create_draft_answer_prompt, output_key="statement" + ) + list_assertions_chain = LLMChain( + llm=self.llm, prompt=self.list_assertions_prompt, output_key="assertions" + ) + check_assertions_chain = LLMChain( + llm=self.llm, + prompt=self.check_assertions_prompt, + output_key="checked_assertions", + ) + + revised_answer_chain = LLMChain( + llm=self.llm, + prompt=self.revised_answer_prompt, + output_key="revised_statement", + ) + + chains = [ + create_draft_answer_chain, + list_assertions_chain, + check_assertions_chain, + revised_answer_chain, + ] + + question_to_checked_assertions_chain = SequentialChain( + chains=chains, + input_variables=["question"], + output_variables=["revised_statement"], + verbose=True, + ) + output = question_to_checked_assertions_chain({"question": question}) + return {self.output_key: output["revised_statement"]} + + @property + def _chain_type(self) -> str: + return "llm_checker_chain" diff --git a/langchain/chains/llm_checker/prompt.py b/langchain/chains/llm_checker/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..73c883d0c204d225c14f5f6549f3f38e2a1383ef --- /dev/null +++ b/langchain/chains/llm_checker/prompt.py @@ -0,0 +1,31 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_CREATE_DRAFT_ANSWER_TEMPLATE = """{question}\n\n""" +CREATE_DRAFT_ANSWER_PROMPT = PromptTemplate( + input_variables=["question"], template=_CREATE_DRAFT_ANSWER_TEMPLATE +) + +_LIST_ASSERTIONS_TEMPLATE = """Here is a statement: +{statement} +Make a bullet point list of the assumptions you made when producing the above statement.\n\n""" +LIST_ASSERTIONS_PROMPT = PromptTemplate( + input_variables=["statement"], template=_LIST_ASSERTIONS_TEMPLATE +) + +_CHECK_ASSERTIONS_TEMPLATE = """Here is a bullet point list of assertions: +{assertions} +For each assertion, determine whether it is true or false. If it is false, explain why.\n\n""" +CHECK_ASSERTIONS_PROMPT = PromptTemplate( + input_variables=["assertions"], template=_CHECK_ASSERTIONS_TEMPLATE +) + +_REVISED_ANSWER_TEMPLATE = """{checked_assertions} + +Question: In light of the above assertions and checks, how would you answer the question '{question}'? + +Answer:""" +REVISED_ANSWER_PROMPT = PromptTemplate( + input_variables=["checked_assertions", "question"], + template=_REVISED_ANSWER_TEMPLATE, +) diff --git a/langchain/chains/llm_math/__init__.py b/langchain/chains/llm_math/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9fd272583bc1d2e8cc776464c2fbac8ea91ad6 --- /dev/null +++ b/langchain/chains/llm_math/__init__.py @@ -0,0 +1,4 @@ +"""Chain that interprets a prompt and executes python code to do math. + +Heavily borrowed from https://replit.com/@amasad/gptpy?v=1#main.py +""" diff --git a/langchain/chains/llm_math/__pycache__/__init__.cpython-39.pyc b/langchain/chains/llm_math/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0c2df5f34657bf04746902b8f24b75598907adf Binary files /dev/null and b/langchain/chains/llm_math/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/llm_math/__pycache__/base.cpython-39.pyc b/langchain/chains/llm_math/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01f9774d69dfa6afcb6073c9446b509ea3843bea Binary files /dev/null and b/langchain/chains/llm_math/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/llm_math/__pycache__/prompt.cpython-39.pyc b/langchain/chains/llm_math/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6261996fc13a72e3ad1f06ade1f5124e73e0752 Binary files /dev/null and b/langchain/chains/llm_math/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/llm_math/base.py b/langchain/chains/llm_math/base.py new file mode 100644 index 0000000000000000000000000000000000000000..3faebe3af2ffe0c81baffefcffd329af266bc9e9 --- /dev/null +++ b/langchain/chains/llm_math/base.py @@ -0,0 +1,91 @@ +"""Chain that interprets a prompt and executes python code to do math.""" +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.llm_math.prompt import PROMPT +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.python import PythonREPL + + +class LLMMathChain(Chain, BaseModel): + """Chain that interprets a prompt and executes python code to do math. + + Example: + .. code-block:: python + + from langchain import LLMMathChain, OpenAI + llm_math = LLMMathChain(llm=OpenAI()) + """ + + llm: BaseLLM + """LLM wrapper to use.""" + prompt: BasePromptTemplate = PROMPT + """Prompt to use to translate to python if neccessary.""" + input_key: str = "question" #: :meta private: + output_key: str = "answer" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Expect output key. + + :meta private: + """ + return [self.output_key] + + def _process_llm_result(self, t: str) -> Dict[str, str]: + python_executor = PythonREPL() + self.callback_manager.on_text(t, color="green", verbose=self.verbose) + t = t.strip() + if t.startswith("```python"): + code = t[9:-4] + output = python_executor.run(code) + self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose) + self.callback_manager.on_text(output, color="yellow", verbose=self.verbose) + answer = "Answer: " + output + elif t.startswith("Answer:"): + answer = t + elif "Answer:" in t: + answer = "Answer: " + t.split("Answer:")[-1] + else: + raise ValueError(f"unknown format from LLM: {t}") + return {self.output_key: answer} + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_executor = LLMChain( + prompt=self.prompt, llm=self.llm, callback_manager=self.callback_manager + ) + self.callback_manager.on_text(inputs[self.input_key], verbose=self.verbose) + t = llm_executor.predict(question=inputs[self.input_key], stop=["```output"]) + return self._process_llm_result(t) + + async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_executor = LLMChain( + prompt=self.prompt, llm=self.llm, callback_manager=self.callback_manager + ) + self.callback_manager.on_text(inputs[self.input_key], verbose=self.verbose) + t = await llm_executor.apredict( + question=inputs[self.input_key], stop=["```output"] + ) + return self._process_llm_result(t) + + @property + def _chain_type(self) -> str: + return "llm_math_chain" diff --git a/langchain/chains/llm_math/prompt.py b/langchain/chains/llm_math/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3e78b475c3b224cfbd3f77e6c5c907ed599725cd --- /dev/null +++ b/langchain/chains/llm_math/prompt.py @@ -0,0 +1,30 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_PROMPT_TEMPLATE = """Translate a math problem into Python code that can be executed in Python 3 REPL. Use the output of running this code to answer the question. + +Question: ${{Question with math problem.}} +```python +${{Code that solves the problem and prints the solution}} +``` +```output +${{Output of running the code}} +``` +Answer: ${{Answer}} + +Begin. + +Question: What is 37593 * 67? + +```python +print(37593 * 67) +``` +```output +2518731 +``` +Answer: 2518731 + +Question: {question} +""" + +PROMPT = PromptTemplate(input_variables=["question"], template=_PROMPT_TEMPLATE) diff --git a/langchain/chains/llm_requests.py b/langchain/chains/llm_requests.py new file mode 100644 index 0000000000000000000000000000000000000000..2374a42e886ae2a724f687ee2099c3d9dec0c2f8 --- /dev/null +++ b/langchain/chains/llm_requests.py @@ -0,0 +1,79 @@ +"""Chain that hits a URL and then uses an LLM to parse results.""" +from __future__ import annotations + +from typing import Dict, List + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains import LLMChain +from langchain.chains.base import Chain +from langchain.requests import RequestsWrapper + +DEFAULT_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" # noqa: E501 +} + + +class LLMRequestsChain(Chain, BaseModel): + """Chain that hits a URL and then uses an LLM to parse results.""" + + llm_chain: LLMChain + requests_wrapper: RequestsWrapper = Field( + default_factory=RequestsWrapper, exclude=True + ) + text_length: int = 8000 + requests_key: str = "requests_result" #: :meta private: + input_key: str = "url" #: :meta private: + output_key: str = "output" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Will be whatever keys the prompt expects. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Will always return text key. + + :meta private: + """ + return [self.output_key] + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + try: + from bs4 import BeautifulSoup # noqa: F401 + + except ImportError: + raise ValueError( + "Could not import bs4 python package. " + "Please it install it with `pip install bs4`." + ) + return values + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + from bs4 import BeautifulSoup + + # Other keys are assumed to be needed for LLM prediction + other_keys = {k: v for k, v in inputs.items() if k != self.input_key} + url = inputs[self.input_key] + res = self.requests_wrapper.get(url) + # extract the text from the html + soup = BeautifulSoup(res, "html.parser") + other_keys[self.requests_key] = soup.get_text()[: self.text_length] + result = self.llm_chain.predict(**other_keys) + return {self.output_key: result} + + @property + def _chain_type(self) -> str: + return "llm_requests_chain" diff --git a/langchain/chains/llm_summarization_checker/__init__.py b/langchain/chains/llm_summarization_checker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..385990816100d79d9f24c7b48938196ed1214b4c --- /dev/null +++ b/langchain/chains/llm_summarization_checker/__init__.py @@ -0,0 +1,7 @@ +"""Summarization checker chain for verifying accuracy of text generation. + +Chain that tries to verify the accuracy of text generation by splitting it into a +list of facts, then checking if those facts are true or not, and rewriting +the text to make it more truth-ful. It will repeat this loop until it hits `max_tries` +or gets to a "true" output. +""" diff --git a/langchain/chains/llm_summarization_checker/__pycache__/__init__.cpython-39.pyc b/langchain/chains/llm_summarization_checker/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bad0089f2b7b28da4e18a21e6d53a6d25c018bf9 Binary files /dev/null and b/langchain/chains/llm_summarization_checker/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/llm_summarization_checker/__pycache__/base.cpython-39.pyc b/langchain/chains/llm_summarization_checker/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc871cfdb935772ee2213232690d01467a79035d Binary files /dev/null and b/langchain/chains/llm_summarization_checker/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/llm_summarization_checker/base.py b/langchain/chains/llm_summarization_checker/base.py new file mode 100644 index 0000000000000000000000000000000000000000..656b649fbc5cbcae9557072daedc57d617f8d510 --- /dev/null +++ b/langchain/chains/llm_summarization_checker/base.py @@ -0,0 +1,133 @@ +"""Chain for summarization with self-verification.""" + +from pathlib import Path +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.sequential import SequentialChain +from langchain.llms.base import BaseLLM +from langchain.prompts.prompt import PromptTemplate + +PROMPTS_DIR = Path(__file__).parent / "prompts" + +CREATE_ASSERTIONS_PROMPT = PromptTemplate.from_file( + PROMPTS_DIR / "create_facts.txt", ["summary"] +) +CHECK_ASSERTIONS_PROMPT = PromptTemplate.from_file( + PROMPTS_DIR / "check_facts.txt", ["assertions"] +) +REVISED_SUMMARY_PROMPT = PromptTemplate.from_file( + PROMPTS_DIR / "revise_summary.txt", ["checked_assertions", "summary"] +) +ARE_ALL_TRUE_PROMPT = PromptTemplate.from_file( + PROMPTS_DIR / "are_all_true_prompt.txt", ["checked_assertions"] +) + + +class LLMSummarizationCheckerChain(Chain, BaseModel): + """Chain for question-answering with self-verification. + + Example: + .. code-block:: python + + from langchain import OpenAI, LLMSummarizationCheckerChain + llm = OpenAI(temperature=0.0) + checker_chain = LLMSummarizationCheckerChain(llm=llm) + """ + + llm: BaseLLM + """LLM wrapper to use.""" + + create_assertions_prompt: PromptTemplate = CREATE_ASSERTIONS_PROMPT + check_assertions_prompt: PromptTemplate = CHECK_ASSERTIONS_PROMPT + revised_summary_prompt: PromptTemplate = REVISED_SUMMARY_PROMPT + are_all_true_prompt: PromptTemplate = ARE_ALL_TRUE_PROMPT + + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + max_checks: int = 2 + """Maximum number of times to check the assertions. Default to double-checking.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + all_true = False + count = 0 + output = None + original_input = inputs[self.input_key] + chain_input = original_input + + while not all_true and count < self.max_checks: + chain = SequentialChain( + chains=[ + LLMChain( + llm=self.llm, + prompt=self.create_assertions_prompt, + output_key="assertions", + verbose=self.verbose, + ), + LLMChain( + llm=self.llm, + prompt=self.check_assertions_prompt, + output_key="checked_assertions", + verbose=self.verbose, + ), + LLMChain( + llm=self.llm, + prompt=self.revised_summary_prompt, + output_key="revised_summary", + verbose=self.verbose, + ), + LLMChain( + llm=self.llm, + output_key="all_true", + prompt=self.are_all_true_prompt, + verbose=self.verbose, + ), + ], + input_variables=["summary"], + output_variables=["all_true", "revised_summary"], + verbose=self.verbose, + ) + output = chain({"summary": chain_input}) + count += 1 + + if output["all_true"].strip() == "True": + break + + if self.verbose: + print(output["revised_summary"]) + + chain_input = output["revised_summary"] + + if not output: + raise ValueError("No output from chain") + + return {self.output_key: output["revised_summary"].strip()} + + @property + def _chain_type(self) -> str: + return "llm_summarization_checker_chain" diff --git a/langchain/chains/llm_summarization_checker/prompts/are_all_true_prompt.txt b/langchain/chains/llm_summarization_checker/prompts/are_all_true_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb1bedabfdf4bf3a26577a1be7d36e46827fb98a --- /dev/null +++ b/langchain/chains/llm_summarization_checker/prompts/are_all_true_prompt.txt @@ -0,0 +1,38 @@ +Below are some assertions that have been fact checked and are labeled as true or false. + +If all of the assertions are true, return "True". If any of the assertions are false, return "False". + +Here are some examples: +=== + +Checked Assertions: """ +- The sky is red: False +- Water is made of lava: False +- The sun is a star: True +""" +Result: False + +=== + +Checked Assertions: """ +- The sky is blue: True +- Water is wet: True +- The sun is a star: True +""" +Result: True + +=== + +Checked Assertions: """ +- The sky is blue - True +- Water is made of lava- False +- The sun is a star - True +""" +Result: False + +=== + +Checked Assertions:""" +{checked_assertions} +""" +Result: \ No newline at end of file diff --git a/langchain/chains/llm_summarization_checker/prompts/check_facts.txt b/langchain/chains/llm_summarization_checker/prompts/check_facts.txt new file mode 100644 index 0000000000000000000000000000000000000000..b675d31823bf167a8a0d4c826cccb2c103996384 --- /dev/null +++ b/langchain/chains/llm_summarization_checker/prompts/check_facts.txt @@ -0,0 +1,10 @@ +You are an expert fact checker. You have been hired by a major news organization to fact check a very important story. + +Here is a bullet point list of facts: +""" +{assertions} +""" + +For each fact, determine whether it is true or false about the subject. If you are unable to determine whether the fact is true or false, output "Undetermined". +If the fact is false, explain why. + diff --git a/langchain/chains/llm_summarization_checker/prompts/create_facts.txt b/langchain/chains/llm_summarization_checker/prompts/create_facts.txt new file mode 100644 index 0000000000000000000000000000000000000000..e85079a1d0f8a74436e68f204fc01caeacbeeb89 --- /dev/null +++ b/langchain/chains/llm_summarization_checker/prompts/create_facts.txt @@ -0,0 +1,10 @@ +Given some text, extract a list of facts from the text. + +Format your output as a bulleted list. + +Text: +""" +{summary} +""" + +Facts: \ No newline at end of file diff --git a/langchain/chains/llm_summarization_checker/prompts/revise_summary.txt b/langchain/chains/llm_summarization_checker/prompts/revise_summary.txt new file mode 100644 index 0000000000000000000000000000000000000000..70f0d2fe10f7b26c9eaf802a758215036119dfde --- /dev/null +++ b/langchain/chains/llm_summarization_checker/prompts/revise_summary.txt @@ -0,0 +1,17 @@ +Below are some assertions that have been fact checked and are labeled as true of false. If the answer is false, a suggestion is given for a correction. + +Checked Assertions: +""" +{checked_assertions} +""" + +Original Summary: +""" +{summary} +""" + +Using these checked assertions, rewrite the original summary to be completely true. + +The output should have the same structure and formatting as the original summary. + +Summary: \ No newline at end of file diff --git a/langchain/chains/loading.py b/langchain/chains/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9b78f9cff84e7f0a054784afdd40b9a19b9397 --- /dev/null +++ b/langchain/chains/loading.py @@ -0,0 +1,474 @@ +"""Functionality for loading chains.""" +import json +from pathlib import Path +from typing import Any, Union + +import yaml + +from langchain.chains.api.base import APIChain +from langchain.chains.base import Chain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain +from langchain.chains.combine_documents.refine import RefineDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.hyde.base import HypotheticalDocumentEmbedder +from langchain.chains.llm import LLMChain +from langchain.chains.llm_bash.base import LLMBashChain +from langchain.chains.llm_checker.base import LLMCheckerChain +from langchain.chains.llm_math.base import LLMMathChain +from langchain.chains.llm_requests import LLMRequestsChain +from langchain.chains.pal.base import PALChain +from langchain.chains.qa_with_sources.base import QAWithSourcesChain +from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain +from langchain.chains.retrieval_qa.base import VectorDBQA +from langchain.chains.sql_database.base import SQLDatabaseChain +from langchain.llms.loading import load_llm, load_llm_from_config +from langchain.prompts.loading import load_prompt, load_prompt_from_config +from langchain.utilities.loading import try_load_from_hub + +URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/chains/" + + +def _load_llm_chain(config: dict, **kwargs: Any) -> LLMChain: + """Load LLM chain from config dict.""" + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + + if "prompt" in config: + prompt_config = config.pop("prompt") + prompt = load_prompt_from_config(prompt_config) + elif "prompt_path" in config: + prompt = load_prompt(config.pop("prompt_path")) + else: + raise ValueError("One of `prompt` or `prompt_path` must be present.") + + return LLMChain(llm=llm, prompt=prompt, **config) + + +def _load_hyde_chain(config: dict, **kwargs: Any) -> HypotheticalDocumentEmbedder: + """Load hypothetical document embedder chain from config dict.""" + if "llm_chain" in config: + llm_chain_config = config.pop("llm_chain") + llm_chain = load_chain_from_config(llm_chain_config) + elif "llm_chain_path" in config: + llm_chain = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` or `llm_chain_path` must be present.") + if "embeddings" in kwargs: + embeddings = kwargs.pop("embeddings") + else: + raise ValueError("`embeddings` must be present.") + return HypotheticalDocumentEmbedder( + llm_chain=llm_chain, base_embeddings=embeddings, **config + ) + + +def _load_stuff_documents_chain(config: dict, **kwargs: Any) -> StuffDocumentsChain: + if "llm_chain" in config: + llm_chain_config = config.pop("llm_chain") + llm_chain = load_chain_from_config(llm_chain_config) + elif "llm_chain_path" in config: + llm_chain = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` or `llm_chain_config` must be present.") + + if not isinstance(llm_chain, LLMChain): + raise ValueError(f"Expected LLMChain, got {llm_chain}") + + if "document_prompt" in config: + prompt_config = config.pop("document_prompt") + document_prompt = load_prompt_from_config(prompt_config) + elif "document_prompt_path" in config: + document_prompt = load_prompt(config.pop("document_prompt_path")) + else: + raise ValueError( + "One of `document_prompt` or `document_prompt_path` must be present." + ) + + return StuffDocumentsChain( + llm_chain=llm_chain, document_prompt=document_prompt, **config + ) + + +def _load_map_reduce_documents_chain( + config: dict, **kwargs: Any +) -> MapReduceDocumentsChain: + if "llm_chain" in config: + llm_chain_config = config.pop("llm_chain") + llm_chain = load_chain_from_config(llm_chain_config) + elif "llm_chain_path" in config: + llm_chain = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` or `llm_chain_config` must be present.") + + if not isinstance(llm_chain, LLMChain): + raise ValueError(f"Expected LLMChain, got {llm_chain}") + + if "combine_document_chain" in config: + combine_document_chain_config = config.pop("combine_document_chain") + combine_document_chain = load_chain_from_config(combine_document_chain_config) + elif "combine_document_chain_path" in config: + combine_document_chain = load_chain(config.pop("combine_document_chain_path")) + else: + raise ValueError( + "One of `combine_document_chain` or " + "`combine_document_chain_path` must be present." + ) + if "collapse_document_chain" in config: + collapse_document_chain_config = config.pop("collapse_document_chain") + if collapse_document_chain_config is None: + collapse_document_chain = None + else: + collapse_document_chain = load_chain_from_config( + collapse_document_chain_config + ) + elif "collapse_document_chain_path" in config: + collapse_document_chain = load_chain(config.pop("collapse_document_chain_path")) + return MapReduceDocumentsChain( + llm_chain=llm_chain, + combine_document_chain=combine_document_chain, + collapse_document_chain=collapse_document_chain, + **config, + ) + + +def _load_llm_bash_chain(config: dict, **kwargs: Any) -> LLMBashChain: + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + if "prompt" in config: + prompt_config = config.pop("prompt") + prompt = load_prompt_from_config(prompt_config) + elif "prompt_path" in config: + prompt = load_prompt(config.pop("prompt_path")) + return LLMBashChain(llm=llm, prompt=prompt, **config) + + +def _load_llm_checker_chain(config: dict, **kwargs: Any) -> LLMCheckerChain: + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + if "create_draft_answer_prompt" in config: + create_draft_answer_prompt_config = config.pop("create_draft_answer_prompt") + create_draft_answer_prompt = load_prompt_from_config( + create_draft_answer_prompt_config + ) + elif "create_draft_answer_prompt_path" in config: + create_draft_answer_prompt = load_prompt( + config.pop("create_draft_answer_prompt_path") + ) + if "list_assertions_prompt" in config: + list_assertions_prompt_config = config.pop("list_assertions_prompt") + list_assertions_prompt = load_prompt_from_config(list_assertions_prompt_config) + elif "list_assertions_prompt_path" in config: + list_assertions_prompt = load_prompt(config.pop("list_assertions_prompt_path")) + if "check_assertions_prompt" in config: + check_assertions_prompt_config = config.pop("check_assertions_prompt") + check_assertions_prompt = load_prompt_from_config( + check_assertions_prompt_config + ) + elif "check_assertions_prompt_path" in config: + check_assertions_prompt = load_prompt( + config.pop("check_assertions_prompt_path") + ) + if "revised_answer_prompt" in config: + revised_answer_prompt_config = config.pop("revised_answer_prompt") + revised_answer_prompt = load_prompt_from_config(revised_answer_prompt_config) + elif "revised_answer_prompt_path" in config: + revised_answer_prompt = load_prompt(config.pop("revised_answer_prompt_path")) + return LLMCheckerChain( + llm=llm, + create_draft_answer_prompt=create_draft_answer_prompt, + list_assertions_prompt=list_assertions_prompt, + check_assertions_prompt=check_assertions_prompt, + revised_answer_prompt=revised_answer_prompt, + **config, + ) + + +def _load_llm_math_chain(config: dict, **kwargs: Any) -> LLMMathChain: + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + if "prompt" in config: + prompt_config = config.pop("prompt") + prompt = load_prompt_from_config(prompt_config) + elif "prompt_path" in config: + prompt = load_prompt(config.pop("prompt_path")) + return LLMMathChain(llm=llm, prompt=prompt, **config) + + +def _load_map_rerank_documents_chain( + config: dict, **kwargs: Any +) -> MapRerankDocumentsChain: + if "llm_chain" in config: + llm_chain_config = config.pop("llm_chain") + llm_chain = load_chain_from_config(llm_chain_config) + elif "llm_chain_path" in config: + llm_chain = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` or `llm_chain_config` must be present.") + return MapRerankDocumentsChain(llm_chain=llm_chain, **config) + + +def _load_pal_chain(config: dict, **kwargs: Any) -> PALChain: + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + if "prompt" in config: + prompt_config = config.pop("prompt") + prompt = load_prompt_from_config(prompt_config) + elif "prompt_path" in config: + prompt = load_prompt(config.pop("prompt_path")) + else: + raise ValueError("One of `prompt` or `prompt_path` must be present.") + return PALChain(llm=llm, prompt=prompt, **config) + + +def _load_refine_documents_chain(config: dict, **kwargs: Any) -> RefineDocumentsChain: + if "initial_llm_chain" in config: + initial_llm_chain_config = config.pop("initial_llm_chain") + initial_llm_chain = load_chain_from_config(initial_llm_chain_config) + elif "initial_llm_chain_path" in config: + initial_llm_chain = load_chain(config.pop("initial_llm_chain_path")) + else: + raise ValueError( + "One of `initial_llm_chain` or `initial_llm_chain_config` must be present." + ) + if "refine_llm_chain" in config: + refine_llm_chain_config = config.pop("refine_llm_chain") + refine_llm_chain = load_chain_from_config(refine_llm_chain_config) + elif "refine_llm_chain_path" in config: + refine_llm_chain = load_chain(config.pop("refine_llm_chain_path")) + else: + raise ValueError( + "One of `refine_llm_chain` or `refine_llm_chain_config` must be present." + ) + if "document_prompt" in config: + prompt_config = config.pop("document_prompt") + document_prompt = load_prompt_from_config(prompt_config) + elif "document_prompt_path" in config: + document_prompt = load_prompt(config.pop("document_prompt_path")) + return RefineDocumentsChain( + initial_llm_chain=initial_llm_chain, + refine_llm_chain=refine_llm_chain, + document_prompt=document_prompt, + **config, + ) + + +def _load_qa_with_sources_chain(config: dict, **kwargs: Any) -> QAWithSourcesChain: + if "combine_documents_chain" in config: + combine_documents_chain_config = config.pop("combine_documents_chain") + combine_documents_chain = load_chain_from_config(combine_documents_chain_config) + elif "combine_documents_chain_path" in config: + combine_documents_chain = load_chain(config.pop("combine_documents_chain_path")) + else: + raise ValueError( + "One of `combine_documents_chain` or " + "`combine_documents_chain_path` must be present." + ) + return QAWithSourcesChain(combine_documents_chain=combine_documents_chain, **config) + + +def _load_sql_database_chain(config: dict, **kwargs: Any) -> SQLDatabaseChain: + if "database" in kwargs: + database = kwargs.pop("database") + else: + raise ValueError("`database` must be present.") + if "llm" in config: + llm_config = config.pop("llm") + llm = load_llm_from_config(llm_config) + elif "llm_path" in config: + llm = load_llm(config.pop("llm_path")) + else: + raise ValueError("One of `llm` or `llm_path` must be present.") + if "prompt" in config: + prompt_config = config.pop("prompt") + prompt = load_prompt_from_config(prompt_config) + return SQLDatabaseChain(database=database, llm=llm, prompt=prompt, **config) + + +def _load_vector_db_qa_with_sources_chain( + config: dict, **kwargs: Any +) -> VectorDBQAWithSourcesChain: + if "vectorstore" in kwargs: + vectorstore = kwargs.pop("vectorstore") + else: + raise ValueError("`vectorstore` must be present.") + if "combine_documents_chain" in config: + combine_documents_chain_config = config.pop("combine_documents_chain") + combine_documents_chain = load_chain_from_config(combine_documents_chain_config) + elif "combine_documents_chain_path" in config: + combine_documents_chain = load_chain(config.pop("combine_documents_chain_path")) + else: + raise ValueError( + "One of `combine_documents_chain` or " + "`combine_documents_chain_path` must be present." + ) + return VectorDBQAWithSourcesChain( + combine_documents_chain=combine_documents_chain, + vectorstore=vectorstore, + **config, + ) + + +def _load_vector_db_qa(config: dict, **kwargs: Any) -> VectorDBQA: + if "vectorstore" in kwargs: + vectorstore = kwargs.pop("vectorstore") + else: + raise ValueError("`vectorstore` must be present.") + if "combine_documents_chain" in config: + combine_documents_chain_config = config.pop("combine_documents_chain") + combine_documents_chain = load_chain_from_config(combine_documents_chain_config) + elif "combine_documents_chain_path" in config: + combine_documents_chain = load_chain(config.pop("combine_documents_chain_path")) + else: + raise ValueError( + "One of `combine_documents_chain` or " + "`combine_documents_chain_path` must be present." + ) + return VectorDBQA( + combine_documents_chain=combine_documents_chain, + vectorstore=vectorstore, + **config, + ) + + +def _load_api_chain(config: dict, **kwargs: Any) -> APIChain: + if "api_request_chain" in config: + api_request_chain_config = config.pop("api_request_chain") + api_request_chain = load_chain_from_config(api_request_chain_config) + elif "api_request_chain_path" in config: + api_request_chain = load_chain(config.pop("api_request_chain_path")) + else: + raise ValueError( + "One of `api_request_chain` or `api_request_chain_path` must be present." + ) + if "api_answer_chain" in config: + api_answer_chain_config = config.pop("api_answer_chain") + api_answer_chain = load_chain_from_config(api_answer_chain_config) + elif "api_answer_chain_path" in config: + api_answer_chain = load_chain(config.pop("api_answer_chain_path")) + else: + raise ValueError( + "One of `api_answer_chain` or `api_answer_chain_path` must be present." + ) + if "requests_wrapper" in kwargs: + requests_wrapper = kwargs.pop("requests_wrapper") + else: + raise ValueError("`requests_wrapper` must be present.") + return APIChain( + api_request_chain=api_request_chain, + api_answer_chain=api_answer_chain, + requests_wrapper=requests_wrapper, + **config, + ) + + +def _load_llm_requests_chain(config: dict, **kwargs: Any) -> LLMRequestsChain: + if "llm_chain" in config: + llm_chain_config = config.pop("llm_chain") + llm_chain = load_chain_from_config(llm_chain_config) + elif "llm_chain_path" in config: + llm_chain = load_chain(config.pop("llm_chain_path")) + else: + raise ValueError("One of `llm_chain` or `llm_chain_path` must be present.") + if "requests_wrapper" in kwargs: + requests_wrapper = kwargs.pop("requests_wrapper") + return LLMRequestsChain( + llm_chain=llm_chain, requests_wrapper=requests_wrapper, **config + ) + else: + return LLMRequestsChain(llm_chain=llm_chain, **config) + + +type_to_loader_dict = { + "api_chain": _load_api_chain, + "hyde_chain": _load_hyde_chain, + "llm_chain": _load_llm_chain, + "llm_bash_chain": _load_llm_bash_chain, + "llm_checker_chain": _load_llm_checker_chain, + "llm_math_chain": _load_llm_math_chain, + "llm_requests_chain": _load_llm_requests_chain, + "pal_chain": _load_pal_chain, + "qa_with_sources_chain": _load_qa_with_sources_chain, + "stuff_documents_chain": _load_stuff_documents_chain, + "map_reduce_documents_chain": _load_map_reduce_documents_chain, + "map_rerank_documents_chain": _load_map_rerank_documents_chain, + "refine_documents_chain": _load_refine_documents_chain, + "sql_database_chain": _load_sql_database_chain, + "vector_db_qa_with_sources_chain": _load_vector_db_qa_with_sources_chain, + "vector_db_qa": _load_vector_db_qa, +} + + +def load_chain_from_config(config: dict, **kwargs: Any) -> Chain: + """Load chain from Config Dict.""" + if "_type" not in config: + raise ValueError("Must specify a chain Type in config") + config_type = config.pop("_type") + + if config_type not in type_to_loader_dict: + raise ValueError(f"Loading {config_type} chain not supported") + + chain_loader = type_to_loader_dict[config_type] + return chain_loader(config, **kwargs) + + +def load_chain(path: Union[str, Path], **kwargs: Any) -> Chain: + """Unified method for loading a chain from LangChainHub or local fs.""" + if hub_result := try_load_from_hub( + path, _load_chain_from_file, "chains", {"json", "yaml"}, **kwargs + ): + return hub_result + else: + return _load_chain_from_file(path, **kwargs) + + +def _load_chain_from_file(file: Union[str, Path], **kwargs: Any) -> Chain: + """Load chain from file.""" + # Convert file to Path object. + if isinstance(file, str): + file_path = Path(file) + else: + file_path = file + # Load from either json or yaml. + if file_path.suffix == ".json": + with open(file_path) as f: + config = json.load(f) + elif file_path.suffix == ".yaml": + with open(file_path, "r") as f: + config = yaml.safe_load(f) + else: + raise ValueError("File type must be json or yaml") + + # Override default 'verbose' and 'memory' for the chain + if "verbose" in kwargs: + config["verbose"] = kwargs.pop("verbose") + if "memory" in kwargs: + config["memory"] = kwargs.pop("memory") + + # Load the chain from the config now. + return load_chain_from_config(config, **kwargs) diff --git a/langchain/chains/mapreduce.py b/langchain/chains/mapreduce.py new file mode 100644 index 0000000000000000000000000000000000000000..583e484badd15466471edb7844eef00b735fa408 --- /dev/null +++ b/langchain/chains/mapreduce.py @@ -0,0 +1,74 @@ +"""Map-reduce chain. + +Splits up a document, sends the smaller parts to the LLM with one prompt, +then combines the results with another one. +""" +from __future__ import annotations + +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.text_splitter import TextSplitter + + +class MapReduceChain(Chain, BaseModel): + """Map-reduce chain.""" + + combine_documents_chain: BaseCombineDocumentsChain + """Chain to use to combine documents.""" + text_splitter: TextSplitter + """Text splitter to use.""" + input_key: str = "input_text" #: :meta private: + output_key: str = "output_text" #: :meta private: + + @classmethod + def from_params( + cls, llm: BaseLLM, prompt: BasePromptTemplate, text_splitter: TextSplitter + ) -> MapReduceChain: + """Construct a map-reduce chain that uses the chain for map and reduce.""" + llm_chain = LLMChain(llm=llm, prompt=prompt) + reduce_chain = StuffDocumentsChain(llm_chain=llm_chain) + combine_documents_chain = MapReduceDocumentsChain( + llm_chain=llm_chain, combine_document_chain=reduce_chain + ) + return cls( + combine_documents_chain=combine_documents_chain, text_splitter=text_splitter + ) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + # Split the larger text into smaller chunks. + texts = self.text_splitter.split_text(inputs[self.input_key]) + docs = [Document(page_content=text) for text in texts] + outputs, _ = self.combine_documents_chain.combine_docs(docs) + return {self.output_key: outputs} diff --git a/langchain/chains/moderation.py b/langchain/chains/moderation.py new file mode 100644 index 0000000000000000000000000000000000000000..a288124c382ce6c8f48916dd726ed3b47e97b332 --- /dev/null +++ b/langchain/chains/moderation.py @@ -0,0 +1,82 @@ +"""Pass input through a moderation endpoint.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, root_validator + +from langchain.chains.base import Chain +from langchain.utils import get_from_dict_or_env + + +class OpenAIModerationChain(Chain, BaseModel): + """Pass input through a moderation endpoint. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.chains import OpenAIModerationChain + moderation = OpenAIModerationChain() + """ + + client: Any #: :meta private: + model_name: Optional[str] = None + """Moderation model name to use.""" + error: bool = False + """Whether or not to error if bad content was found.""" + input_key: str = "input" #: :meta private: + output_key: str = "output" #: :meta private: + openai_api_key: Optional[str] = None + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + values["client"] = openai.Moderation + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + return values + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + def _moderate(self, text: str, results: dict) -> str: + if results["flagged"]: + error_str = "Text was found that violates OpenAI's content policy." + if self.error: + raise ValueError(error_str) + else: + return error_str + return text + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + text = inputs[self.input_key] + results = self.client.create(text) + output = self._moderate(text, results["results"][0]) + return {self.output_key: output} diff --git a/langchain/chains/natbot/__init__.py b/langchain/chains/natbot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..45a2231ae51d147993c758754ead0c3cb46efdf0 --- /dev/null +++ b/langchain/chains/natbot/__init__.py @@ -0,0 +1,4 @@ +"""Implement a GPT-3 driven browser. + +Heavily influenced from https://github.com/nat/natbot +""" diff --git a/langchain/chains/natbot/base.py b/langchain/chains/natbot/base.py new file mode 100644 index 0000000000000000000000000000000000000000..d688c6b7dce49e2af152b0c854b57053038961d9 --- /dev/null +++ b/langchain/chains/natbot/base.py @@ -0,0 +1,100 @@ +"""Implement an LLM driven browser.""" +from __future__ import annotations + +from typing import Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.natbot.prompt import PROMPT +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI + + +class NatBotChain(Chain, BaseModel): + """Implement an LLM driven browser. + + Example: + .. code-block:: python + + from langchain import NatBotChain, OpenAI + natbot = NatBotChain(llm=OpenAI(), objective="Buy me a new hat.") + """ + + llm: BaseLLM + """LLM wrapper to use.""" + objective: str + """Objective that NatBot is tasked with completing.""" + input_url_key: str = "url" #: :meta private: + input_browser_content_key: str = "browser_content" #: :meta private: + previous_command: str = "" #: :meta private: + output_key: str = "command" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @classmethod + def from_default(cls, objective: str) -> NatBotChain: + """Load with default LLM.""" + llm = OpenAI(temperature=0.5, best_of=10, n=3, max_tokens=50) + return cls(llm=llm, objective=objective) + + @property + def input_keys(self) -> List[str]: + """Expect url and browser content. + + :meta private: + """ + return [self.input_url_key, self.input_browser_content_key] + + @property + def output_keys(self) -> List[str]: + """Return command. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_executor = LLMChain(prompt=PROMPT, llm=self.llm) + url = inputs[self.input_url_key] + browser_content = inputs[self.input_browser_content_key] + llm_cmd = llm_executor.predict( + objective=self.objective, + url=url[:100], + previous_command=self.previous_command, + browser_content=browser_content[:4500], + ) + llm_cmd = llm_cmd.strip() + self.previous_command = llm_cmd + return {self.output_key: llm_cmd} + + def execute(self, url: str, browser_content: str) -> str: + """Figure out next browser command to run. + + Args: + url: URL of the site currently on. + browser_content: Content of the page as currently displayed by the browser. + + Returns: + Next browser command to run. + + Example: + .. code-block:: python + + browser_content = "...." + llm_command = natbot.run("www.google.com", browser_content) + """ + _inputs = { + self.input_url_key: url, + self.input_browser_content_key: browser_content, + } + return self(_inputs)[self.output_key] + + @property + def _chain_type(self) -> str: + return "nat_bot_chain" diff --git a/langchain/chains/natbot/crawler.py b/langchain/chains/natbot/crawler.py new file mode 100644 index 0000000000000000000000000000000000000000..7275ce7a2b40a47019a9d55dfc362dff154a9de3 --- /dev/null +++ b/langchain/chains/natbot/crawler.py @@ -0,0 +1,427 @@ +# flake8: noqa +import time +from sys import platform +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + TypedDict, + Union, +) + +if TYPE_CHECKING: + from playwright.sync_api import Browser, CDPSession, Page, sync_playwright + +black_listed_elements: Set[str] = { + "html", + "head", + "title", + "meta", + "iframe", + "body", + "script", + "style", + "path", + "svg", + "br", + "::marker", +} + + +class ElementInViewPort(TypedDict): + node_index: str + backend_node_id: int + node_name: Optional[str] + node_value: Optional[str] + node_meta: List[str] + is_clickable: bool + origin_x: int + origin_y: int + center_x: int + center_y: int + + +class Crawler: + def __init__(self) -> None: + try: + from playwright.sync_api import sync_playwright + except ImportError: + raise ValueError( + "Could not import playwright python package. " + "Please it install it with `pip install playwright`." + ) + self.browser: Browser = ( + sync_playwright().start().chromium.launch(headless=False) + ) + self.page: Page = self.browser.new_page() + self.page.set_viewport_size({"width": 1280, "height": 1080}) + self.page_element_buffer: Dict[int, ElementInViewPort] + self.client: CDPSession + + def go_to_page(self, url: str) -> None: + self.page.goto(url=url if "://" in url else "http://" + url) + self.client = self.page.context.new_cdp_session(self.page) + self.page_element_buffer = {} + + def scroll(self, direction: str) -> None: + if direction == "up": + self.page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;" + ) + elif direction == "down": + self.page.evaluate( + "(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;" + ) + + def click(self, id: Union[str, int]) -> None: + # Inject javascript into the page which removes the target= attribute from all links + js = """ + links = document.getElementsByTagName("a"); + for (var i = 0; i < links.length; i++) { + links[i].removeAttribute("target"); + } + """ + self.page.evaluate(js) + + element = self.page_element_buffer.get(int(id)) + if element: + x: float = element["center_x"] + y: float = element["center_y"] + + self.page.mouse.click(x, y) + else: + print("Could not find element") + + def type(self, id: Union[str, int], text: str) -> None: + self.click(id) + self.page.keyboard.type(text) + + def enter(self) -> None: + self.page.keyboard.press("Enter") + + def crawl(self) -> List[str]: + page = self.page + page_element_buffer = self.page_element_buffer + start = time.time() + + page_state_as_text = [] + + device_pixel_ratio: float = page.evaluate("window.devicePixelRatio") + if platform == "darwin" and device_pixel_ratio == 1: # lies + device_pixel_ratio = 2 + + win_upper_bound: float = page.evaluate("window.pageYOffset") + win_left_bound: float = page.evaluate("window.pageXOffset") + win_width: float = page.evaluate("window.screen.width") + win_height: float = page.evaluate("window.screen.height") + win_right_bound: float = win_left_bound + win_width + win_lower_bound: float = win_upper_bound + win_height + + # percentage_progress_start = (win_upper_bound / document_scroll_height) * 100 + # percentage_progress_end = ( + # (win_height + win_upper_bound) / document_scroll_height + # ) * 100 + percentage_progress_start = 1 + percentage_progress_end = 2 + + page_state_as_text.append( + { + "x": 0, + "y": 0, + "text": "[scrollbar {:0.2f}-{:0.2f}%]".format( + round(percentage_progress_start, 2), round(percentage_progress_end) + ), + } + ) + + tree = self.client.send( + "DOMSnapshot.captureSnapshot", + {"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True}, + ) + strings: Dict[int, str] = tree["strings"] + document: Dict[str, Any] = tree["documents"][0] + nodes: Dict[str, Any] = document["nodes"] + backend_node_id: Dict[int, int] = nodes["backendNodeId"] + attributes: Dict[int, Dict[int, Any]] = nodes["attributes"] + node_value: Dict[int, int] = nodes["nodeValue"] + parent: Dict[int, int] = nodes["parentIndex"] + node_names: Dict[int, int] = nodes["nodeName"] + is_clickable: Set[int] = set(nodes["isClickable"]["index"]) + + input_value: Dict[str, Any] = nodes["inputValue"] + input_value_index: List[int] = input_value["index"] + input_value_values: List[int] = input_value["value"] + + layout: Dict[str, Any] = document["layout"] + layout_node_index: List[int] = layout["nodeIndex"] + bounds: Dict[int, List[float]] = layout["bounds"] + + cursor: int = 0 + + child_nodes: Dict[str, List[Dict[str, Any]]] = {} + elements_in_view_port: List[ElementInViewPort] = [] + + anchor_ancestry: Dict[str, Tuple[bool, Optional[int]]] = {"-1": (False, None)} + button_ancestry: Dict[str, Tuple[bool, Optional[int]]] = {"-1": (False, None)} + + def convert_name( + node_name: Optional[str], has_click_handler: Optional[bool] + ) -> str: + if node_name == "a": + return "link" + if node_name == "input": + return "input" + if node_name == "img": + return "img" + if ( + node_name == "button" or has_click_handler + ): # found pages that needed this quirk + return "button" + else: + return "text" + + def find_attributes( + attributes: Dict[int, Any], keys: List[str] + ) -> Dict[str, str]: + values = {} + + for [key_index, value_index] in zip(*(iter(attributes),) * 2): + if value_index < 0: + continue + key = strings[key_index] + value = strings[value_index] + + if key in keys: + values[key] = value + keys.remove(key) + + if not keys: + return values + + return values + + def add_to_hash_tree( + hash_tree: Dict[str, Tuple[bool, Optional[int]]], + tag: str, + node_id: int, + node_name: Optional[str], + parent_id: int, + ) -> Tuple[bool, Optional[int]]: + parent_id_str = str(parent_id) + if not parent_id_str in hash_tree: + parent_name = strings[node_names[parent_id]].lower() + grand_parent_id = parent[parent_id] + + add_to_hash_tree( + hash_tree, tag, parent_id, parent_name, grand_parent_id + ) + + is_parent_desc_anchor, anchor_id = hash_tree[parent_id_str] + + # even if the anchor is nested in another anchor, we set the "root" for all descendants to be ::Self + if node_name == tag: + value: Tuple[bool, Optional[int]] = (True, node_id) + elif ( + is_parent_desc_anchor + ): # reuse the parent's anchor_id (which could be much higher in the tree) + value = (True, anchor_id) + else: + value = ( + False, + None, + ) # not a descendant of an anchor, most likely it will become text, an interactive element or discarded + + hash_tree[str(node_id)] = value + + return value + + for index, node_name_index in enumerate(node_names): + node_parent = parent[index] + node_name: Optional[str] = strings[node_name_index].lower() + + is_ancestor_of_anchor, anchor_id = add_to_hash_tree( + anchor_ancestry, "a", index, node_name, node_parent + ) + + is_ancestor_of_button, button_id = add_to_hash_tree( + button_ancestry, "button", index, node_name, node_parent + ) + + try: + cursor = layout_node_index.index( + index + ) # todo replace this with proper cursoring, ignoring the fact this is O(n^2) for the moment + except: + continue + + if node_name in black_listed_elements: + continue + + [x, y, width, height] = bounds[cursor] + x /= device_pixel_ratio + y /= device_pixel_ratio + width /= device_pixel_ratio + height /= device_pixel_ratio + + elem_left_bound = x + elem_top_bound = y + elem_right_bound = x + width + elem_lower_bound = y + height + + partially_is_in_viewport = ( + elem_left_bound < win_right_bound + and elem_right_bound >= win_left_bound + and elem_top_bound < win_lower_bound + and elem_lower_bound >= win_upper_bound + ) + + if not partially_is_in_viewport: + continue + + meta_data: List[str] = [] + + # inefficient to grab the same set of keys for kinds of objects, but it's fine for now + element_attributes = find_attributes( + attributes[index], ["type", "placeholder", "aria-label", "title", "alt"] + ) + + ancestor_exception = is_ancestor_of_anchor or is_ancestor_of_button + ancestor_node_key = ( + None + if not ancestor_exception + else str(anchor_id) + if is_ancestor_of_anchor + else str(button_id) + ) + ancestor_node = ( + None + if not ancestor_exception + else child_nodes.setdefault(str(ancestor_node_key), []) + ) + + if node_name == "#text" and ancestor_exception and ancestor_node: + text = strings[node_value[index]] + if text == "|" or text == "•": + continue + ancestor_node.append({"type": "type", "value": text}) + else: + if ( + node_name == "input" and element_attributes.get("type") == "submit" + ) or node_name == "button": + node_name = "button" + element_attributes.pop( + "type", None + ) # prevent [button ... (button)..] + + for key in element_attributes: + if ancestor_exception and ancestor_node: + ancestor_node.append( + { + "type": "attribute", + "key": key, + "value": element_attributes[key], + } + ) + else: + meta_data.append(element_attributes[key]) + + element_node_value = None + + if node_value[index] >= 0: + element_node_value = strings[node_value[index]] + if ( + element_node_value == "|" + ): # commonly used as a separator, does not add much context - lets save ourselves some token space + continue + elif ( + node_name == "input" + and index in input_value_index + and element_node_value is None + ): + node_input_text_index = input_value_index.index(index) + text_index = input_value_values[node_input_text_index] + if node_input_text_index >= 0 and text_index >= 0: + element_node_value = strings[text_index] + + # remove redudant elements + if ancestor_exception and (node_name != "a" and node_name != "button"): + continue + + elements_in_view_port.append( + { + "node_index": str(index), + "backend_node_id": backend_node_id[index], + "node_name": node_name, + "node_value": element_node_value, + "node_meta": meta_data, + "is_clickable": index in is_clickable, + "origin_x": int(x), + "origin_y": int(y), + "center_x": int(x + (width / 2)), + "center_y": int(y + (height / 2)), + } + ) + + # lets filter further to remove anything that does not hold any text nor has click handlers + merge text from leaf#text nodes with the parent + elements_of_interest = [] + id_counter = 0 + + for element in elements_in_view_port: + node_index = element.get("node_index") + node_name = element.get("node_name") + element_node_value = element.get("node_value") + node_is_clickable = element.get("is_clickable") + node_meta_data: Optional[List[str]] = element.get("node_meta") + + inner_text = f"{element_node_value} " if element_node_value else "" + meta = "" + + if node_index in child_nodes: + for child in child_nodes[node_index]: + entry_type = child.get("type") + entry_value = child.get("value") + + if entry_type == "attribute" and node_meta_data: + entry_key = child.get("key") + node_meta_data.append(f'{entry_key}="{entry_value}"') + else: + inner_text += f"{entry_value} " + + if node_meta_data: + meta_string = " ".join(node_meta_data) + meta = f" {meta_string}" + + if inner_text != "": + inner_text = f"{inner_text.strip()}" + + converted_node_name = convert_name(node_name, node_is_clickable) + + # not very elegant, more like a placeholder + if ( + (converted_node_name != "button" or meta == "") + and converted_node_name != "link" + and converted_node_name != "input" + and converted_node_name != "img" + and converted_node_name != "textarea" + ) and inner_text.strip() == "": + continue + + page_element_buffer[id_counter] = element + + if inner_text != "": + elements_of_interest.append( + f"""<{converted_node_name} id={id_counter}{meta}>{inner_text}""" + ) + else: + elements_of_interest.append( + f"""<{converted_node_name} id={id_counter}{meta}/>""" + ) + id_counter += 1 + + print("Parsing time: {:0.2f} seconds".format(time.time() - start)) + return elements_of_interest diff --git a/langchain/chains/natbot/prompt.py b/langchain/chains/natbot/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3bbda35bab90e49454d2e9bb67b06149b5a81a83 --- /dev/null +++ b/langchain/chains/natbot/prompt.py @@ -0,0 +1,144 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_PROMPT_TEMPLATE = """ +You are an agents controlling a browser. You are given: + + (1) an objective that you are trying to achieve + (2) the URL of your current web page + (3) a simplified text description of what's visible in the browser window (more on that below) + +You can issue these commands: + SCROLL UP - scroll up one page + SCROLL DOWN - scroll down one page + CLICK X - click on a given element. You can only click on links, buttons, and inputs! + TYPE X "TEXT" - type the specified text into the input with id X + TYPESUBMIT X "TEXT" - same as TYPE above, except then it presses ENTER to submit the form + +The format of the browser content is highly simplified; all formatting elements are stripped. +Interactive elements such as links, inputs, buttons are represented like this: + + text + + text + +Images are rendered as their alt text like this: + + + +Based on your given objective, issue whatever command you believe will get you closest to achieving your goal. +You always start on Google; you should submit a search query to Google that will take you to the best page for +achieving your objective. And then interact with that page to achieve your objective. + +If you find yourself on Google and there are no search results displayed yet, you should probably issue a command +like "TYPESUBMIT 7 "search query"" to get to a more useful page. + +Then, if you find yourself on a Google search results page, you might issue the command "CLICK 24" to click +on the first link in the search results. (If your previous command was a TYPESUBMIT your next command should +probably be a CLICK.) + +Don't try to interact with elements that you can't see. + +Here are some examples: + +EXAMPLE 1: +================================================== +CURRENT BROWSER CONTENT: +------------------ +About +Store +Gmail +Images +(Google apps) +Sign in +(Google) + + + + +Advertising +Business +How Search works +Carbon neutral since 2007 +Privacy +Terms +Settings +------------------ +OBJECTIVE: Find a 2 bedroom house for sale in Anchorage AK for under $750k +CURRENT URL: https://www.google.com/ +YOUR COMMAND: +TYPESUBMIT 8 "anchorage redfin" +================================================== + +EXAMPLE 2: +================================================== +CURRENT BROWSER CONTENT: +------------------ +About +Store +Gmail +Images +(Google apps) +Sign in +(Google) + + + + +Advertising +Business +How Search works +Carbon neutral since 2007 +Privacy +Terms +Settings +------------------ +OBJECTIVE: Make a reservation for 4 at Dorsia at 8pm +CURRENT URL: https://www.google.com/ +YOUR COMMAND: +TYPESUBMIT 8 "dorsia nyc opentable" +================================================== + +EXAMPLE 3: +================================================== +CURRENT BROWSER CONTENT: +------------------ + + + + +OpenTable logo + +Find your table for any occasion + +Sep 28, 2022 +7:00 PM +2 people + + +It looks like you're in Peninsula. Not correct? + + +------------------ +OBJECTIVE: Make a reservation for 4 for dinner at Dorsia in New York City at 8pm +CURRENT URL: https://www.opentable.com/ +YOUR COMMAND: +TYPESUBMIT 12 "dorsia new york city" +================================================== + +The current browser content, objective, and current URL follow. Reply with your next command to the browser. + +CURRENT BROWSER CONTENT: +------------------ +{browser_content} +------------------ + +OBJECTIVE: {objective} +CURRENT URL: {url} +PREVIOUS COMMAND: {previous_command} +YOUR COMMAND: +""" +PROMPT = PromptTemplate( + input_variables=["browser_content", "url", "previous_command", "objective"], + template=_PROMPT_TEMPLATE, +) diff --git a/langchain/chains/pal/__init__.py b/langchain/chains/pal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac79f404ae310248b507216a100b3202e30ffd90 --- /dev/null +++ b/langchain/chains/pal/__init__.py @@ -0,0 +1,4 @@ +"""Implements Program-Aided Language Models. + +As in https://arxiv.org/pdf/2211.10435.pdf. +""" diff --git a/langchain/chains/pal/__pycache__/__init__.cpython-39.pyc b/langchain/chains/pal/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e962e44ecd45f9a4ff7f703939058417d2f1b54d Binary files /dev/null and b/langchain/chains/pal/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/pal/__pycache__/base.cpython-39.pyc b/langchain/chains/pal/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40d61221925db88d59a6a3ae2582e6e844c7af49 Binary files /dev/null and b/langchain/chains/pal/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/pal/__pycache__/colored_object_prompt.cpython-39.pyc b/langchain/chains/pal/__pycache__/colored_object_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4bf1131d4654ac430612fe6da79eb667bb327a98 Binary files /dev/null and b/langchain/chains/pal/__pycache__/colored_object_prompt.cpython-39.pyc differ diff --git a/langchain/chains/pal/__pycache__/math_prompt.cpython-39.pyc b/langchain/chains/pal/__pycache__/math_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f9e319c400fa5ccc08f62e5d0fc05b6a0944e85 Binary files /dev/null and b/langchain/chains/pal/__pycache__/math_prompt.cpython-39.pyc differ diff --git a/langchain/chains/pal/base.py b/langchain/chains/pal/base.py new file mode 100644 index 0000000000000000000000000000000000000000..443dd137de456db894fbf8e4e6ad4b64f2763b34 --- /dev/null +++ b/langchain/chains/pal/base.py @@ -0,0 +1,96 @@ +"""Implements Program-Aided Language Models. + +As in https://arxiv.org/pdf/2211.10435.pdf. +""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.pal.colored_object_prompt import COLORED_OBJECT_PROMPT +from langchain.chains.pal.math_prompt import MATH_PROMPT +from langchain.prompts.base import BasePromptTemplate +from langchain.python import PythonREPL +from langchain.schema import BaseLanguageModel + + +class PALChain(Chain, BaseModel): + """Implements Program-Aided Language Models.""" + + llm: BaseLanguageModel + prompt: BasePromptTemplate + stop: str = "\n\n" + get_answer_expr: str = "print(solution())" + python_globals: Optional[Dict[str, Any]] = None + python_locals: Optional[Dict[str, Any]] = None + output_key: str = "result" #: :meta private: + return_intermediate_steps: bool = False + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return self.prompt.input_variables + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + if not self.return_intermediate_steps: + return [self.output_key] + else: + return [self.output_key, "intermediate_steps"] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_chain = LLMChain(llm=self.llm, prompt=self.prompt) + code = llm_chain.predict(stop=[self.stop], **inputs) + self.callback_manager.on_text( + code, color="green", end="\n", verbose=self.verbose + ) + repl = PythonREPL(_globals=self.python_globals, _locals=self.python_locals) + res = repl.run(code + f"\n{self.get_answer_expr}") + output = {self.output_key: res.strip()} + if self.return_intermediate_steps: + output["intermediate_steps"] = code + return output + + @classmethod + def from_math_prompt(cls, llm: BaseLanguageModel, **kwargs: Any) -> PALChain: + """Load PAL from math prompt.""" + return cls( + llm=llm, + prompt=MATH_PROMPT, + stop="\n\n", + get_answer_expr="print(solution())", + **kwargs, + ) + + @classmethod + def from_colored_object_prompt( + cls, llm: BaseLanguageModel, **kwargs: Any + ) -> PALChain: + """Load PAL from colored object prompt.""" + return cls( + llm=llm, + prompt=COLORED_OBJECT_PROMPT, + stop="\n\n\n", + get_answer_expr="print(answer)", + **kwargs, + ) + + @property + def _chain_type(self) -> str: + return "pal_chain" diff --git a/langchain/chains/pal/colored_object_prompt.py b/langchain/chains/pal/colored_object_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..49a3e43f1805192d1c11689a4b58b458dd1e49be --- /dev/null +++ b/langchain/chains/pal/colored_object_prompt.py @@ -0,0 +1,77 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +template = ( + """ +# Generate Python3 Code to solve problems +# Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball? +# Put objects into a dictionary for quick look up +objects = dict() +objects['pencil'] = 'red' +objects['mug'] = 'purple' +objects['keychain'] = 'burgundy' +objects['teddy bear'] = 'fuchsia' +objects['plate'] = 'black' +objects['stress ball'] = 'blue' + +# Look up the color of stress ball +stress_ball_color = objects['stress ball'] +answer = stress_ball_color + + +# Q: On the table, you see a bunch of objects arranged in a row: a purple paperclip, a pink stress ball, a brown keychain, a green scrunchiephone charger, a mauve fidget spinner, and a burgundy pen. What is the color of the object directly to the right of the stress ball? +# Put objects into a list to record ordering +objects = [] +objects += [('paperclip', 'purple')] * 1 +objects += [('stress ball', 'pink')] * 1 +objects += [('keychain', 'brown')] * 1 +objects += [('scrunchiephone charger', 'green')] * 1 +objects += [('fidget spinner', 'mauve')] * 1 +objects += [('pen', 'burgundy')] * 1 + +# Find the index of the stress ball +stress_ball_idx = None +for i, object in enumerate(objects): + if object[0] == 'stress ball': + stress_ball_idx = i + break + +# Find the directly right object +direct_right = objects[i+1] + +# Check the directly right object's color +direct_right_color = direct_right[1] +answer = direct_right_color + + +# Q: On the nightstand, you see the following items arranged in a row: a teal plate, a burgundy keychain, a yellow scrunchiephone charger, an orange mug, a pink notebook, and a grey cup. How many non-orange items do you see to the left of the teal item? +# Put objects into a list to record ordering +objects = [] +objects += [('plate', 'teal')] * 1 +objects += [('keychain', 'burgundy')] * 1 +objects += [('scrunchiephone charger', 'yellow')] * 1 +objects += [('mug', 'orange')] * 1 +objects += [('notebook', 'pink')] * 1 +objects += [('cup', 'grey')] * 1 + +# Find the index of the teal item +teal_idx = None +for i, object in enumerate(objects): + if object[1] == 'teal': + teal_idx = i + break + +# Find non-orange items to the left of the teal item +non_orange = [object for object in objects[:i] if object[1] != 'orange'] + +# Count number of non-orange objects +num_non_orange = len(non_orange) +answer = num_non_orange + + +# Q: {question} +""".strip() + + "\n" +) + +COLORED_OBJECT_PROMPT = PromptTemplate(input_variables=["question"], template=template) diff --git a/langchain/chains/pal/math_prompt.py b/langchain/chains/pal/math_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..95e3537189be5706ab7fe22c8d5f12908d0ee676 --- /dev/null +++ b/langchain/chains/pal/math_prompt.py @@ -0,0 +1,157 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +template = ( + ''' +Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left? + +# solution in Python: + + +def solution(): + """Olivia has $23. She bought five bagels for $3 each. How much money does she have left?""" + money_initial = 23 + bagels = 5 + bagel_cost = 3 + money_spent = bagels * bagel_cost + money_left = money_initial - money_spent + result = money_left + return result + + + + + +Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday? + +# solution in Python: + + +def solution(): + """Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?""" + golf_balls_initial = 58 + golf_balls_lost_tuesday = 23 + golf_balls_lost_wednesday = 2 + golf_balls_left = golf_balls_initial - golf_balls_lost_tuesday - golf_balls_lost_wednesday + result = golf_balls_left + return result + + + + + +Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room? + +# solution in Python: + + +def solution(): + """There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?""" + computers_initial = 9 + computers_per_day = 5 + num_days = 4 # 4 days between monday and thursday + computers_added = computers_per_day * num_days + computers_total = computers_initial + computers_added + result = computers_total + return result + + + + + +Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now? + +# solution in Python: + + +def solution(): + """Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?""" + toys_initial = 5 + mom_toys = 2 + dad_toys = 2 + total_received = mom_toys + dad_toys + total_toys = toys_initial + total_received + result = total_toys + return result + + + + + +Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny? + +# solution in Python: + + +def solution(): + """Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?""" + jason_lollipops_initial = 20 + jason_lollipops_after = 12 + denny_lollipops = jason_lollipops_initial - jason_lollipops_after + result = denny_lollipops + return result + + + + + +Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total? + +# solution in Python: + + +def solution(): + """Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?""" + leah_chocolates = 32 + sister_chocolates = 42 + total_chocolates = leah_chocolates + sister_chocolates + chocolates_eaten = 35 + chocolates_left = total_chocolates - chocolates_eaten + result = chocolates_left + return result + + + + + +Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot? + +# solution in Python: + + +def solution(): + """If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?""" + cars_initial = 3 + cars_arrived = 2 + total_cars = cars_initial + cars_arrived + result = total_cars + return result + + + + + +Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today? + +# solution in Python: + + +def solution(): + """There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?""" + trees_initial = 15 + trees_after = 21 + trees_added = trees_after - trees_initial + result = trees_added + return result + + + + + +Q: {question} + +# solution in Python: +'''.strip() + + "\n\n\n" +) +MATH_PROMPT = PromptTemplate(input_variables=["question"], template=template) diff --git a/langchain/chains/prompt_selector.py b/langchain/chains/prompt_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..190907cc1490f3c3c5b4f7d6707d6cbdfc8b53a8 --- /dev/null +++ b/langchain/chains/prompt_selector.py @@ -0,0 +1,38 @@ +from abc import ABC, abstractmethod +from typing import Callable, List, Tuple + +from pydantic import BaseModel, Field + +from langchain.chat_models.base import BaseChatModel +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class BasePromptSelector(BaseModel, ABC): + @abstractmethod + def get_prompt(self, llm: BaseLanguageModel) -> BasePromptTemplate: + """Get default prompt for a language model.""" + + +class ConditionalPromptSelector(BasePromptSelector): + """Prompt collection that goes through conditionals.""" + + default_prompt: BasePromptTemplate + conditionals: List[ + Tuple[Callable[[BaseLanguageModel], bool], BasePromptTemplate] + ] = Field(default_factory=list) + + def get_prompt(self, llm: BaseLanguageModel) -> BasePromptTemplate: + for condition, prompt in self.conditionals: + if condition(llm): + return prompt + return self.default_prompt + + +def is_llm(llm: BaseLanguageModel) -> bool: + return isinstance(llm, BaseLLM) + + +def is_chat_model(llm: BaseLanguageModel) -> bool: + return isinstance(llm, BaseChatModel) diff --git a/langchain/chains/qa_generation/__init__.py b/langchain/chains/qa_generation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/langchain/chains/qa_generation/__pycache__/__init__.cpython-39.pyc b/langchain/chains/qa_generation/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f05ced79edba9c6a11fc0f620e717a125edd64f7 Binary files /dev/null and b/langchain/chains/qa_generation/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/qa_generation/__pycache__/base.cpython-39.pyc b/langchain/chains/qa_generation/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b34726525f364e82971cfef0e555d767a9cf1c40 Binary files /dev/null and b/langchain/chains/qa_generation/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/qa_generation/__pycache__/prompt.cpython-39.pyc b/langchain/chains/qa_generation/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c59c2ee3d631f2a76a608a4e7185a80990b63fa0 Binary files /dev/null and b/langchain/chains/qa_generation/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/qa_generation/base.py b/langchain/chains/qa_generation/base.py new file mode 100644 index 0000000000000000000000000000000000000000..66907befaec23427c981794235fd45017dace10a --- /dev/null +++ b/langchain/chains/qa_generation/base.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.qa_generation.prompt import PROMPT_SELECTOR +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel +from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter + + +class QAGenerationChain(Chain): + llm_chain: LLMChain + text_splitter: TextSplitter = Field( + default=RecursiveCharacterTextSplitter(chunk_overlap=500) + ) + input_key: str = "text" + output_key: str = "questions" + k: Optional[int] = None + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + prompt: Optional[BasePromptTemplate] = None, + **kwargs: Any, + ) -> QAGenerationChain: + _prompt = prompt or PROMPT_SELECTOR.get_prompt(llm) + chain = LLMChain(llm=llm, prompt=_prompt) + return cls(llm_chain=chain, **kwargs) + + @property + def _chain_type(self) -> str: + raise NotImplementedError + + @property + def input_keys(self) -> List[str]: + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]: + docs = self.text_splitter.create_documents([inputs[self.input_key]]) + results = self.llm_chain.generate([{"text": d.page_content} for d in docs]) + qa = [json.loads(res[0].text) for res in results.generations] + return {self.output_key: qa} + + async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]: + raise NotImplementedError diff --git a/langchain/chains/qa_generation/prompt.py b/langchain/chains/qa_generation/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3919c2a23950c6443ce37c5b92f507bd27e163e4 --- /dev/null +++ b/langchain/chains/qa_generation/prompt.py @@ -0,0 +1,50 @@ +# flake8: noqa +from langchain.chains.prompt_selector import ConditionalPromptSelector, is_chat_model +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.prompts.prompt import PromptTemplate + +templ1 = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions. +Given a piece of text, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities. +When coming up with this question/answer pair, you must respond in the following format: +``` +{{ + "question": "$YOUR_QUESTION_HERE", + "answer": "$THE_ANSWER_HERE" +}} +``` + +Everything between the ``` must be valid json. +""" +templ2 = """Please come up with a question/answer pair, in the specified JSON format, for the following text: +---------------- +{text}""" +CHAT_PROMPT = ChatPromptTemplate.from_messages( + [ + SystemMessagePromptTemplate.from_template(templ1), + HumanMessagePromptTemplate.from_template(templ2), + ] +) +templ = """You are a smart assistant designed to help high school teachers come up with reading comprehension questions. +Given a piece of text, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities. +When coming up with this question/answer pair, you must respond in the following format: +``` +{{ + "question": "$YOUR_QUESTION_HERE", + "answer": "$THE_ANSWER_HERE" +}} +``` + +Everything between the ``` must be valid json. + +Please come up with a question/answer pair, in the specified JSON format, for the following text: +---------------- +{text}""" +PROMPT = PromptTemplate.from_template(templ) + +PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=PROMPT, conditionals=[(is_chat_model, CHAT_PROMPT)] +) diff --git a/langchain/chains/qa_with_sources/__init__.py b/langchain/chains/qa_with_sources/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b1d18e832a33c4c4c40a81429dca889410a9bf19 --- /dev/null +++ b/langchain/chains/qa_with_sources/__init__.py @@ -0,0 +1,4 @@ +"""Load question answering with sources chains.""" +from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain + +__all__ = ["load_qa_with_sources_chain"] diff --git a/langchain/chains/qa_with_sources/__pycache__/__init__.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c0806437ae61edb62ee226a180f6d0ee748b448 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/base.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d9a046eadce704ddaf7358909d770e147e70fe1 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/loading.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef849cab247628a11b75f08fe2ff123f57246940 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/map_reduce_prompt.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/map_reduce_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e4de2432ad62c2973104f78e61113678a42e1d4 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/map_reduce_prompt.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/refine_prompts.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/refine_prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53b434cc6a8bb72ca8d9374bc299821cc3720cb3 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/refine_prompts.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/retrieval.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/retrieval.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ad431646b740b84e7e763bbb1c372c0cb62f170 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/retrieval.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/stuff_prompt.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/stuff_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00a7e53f3344ac6316eb44cc55f4632212406d60 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/stuff_prompt.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/__pycache__/vector_db.cpython-39.pyc b/langchain/chains/qa_with_sources/__pycache__/vector_db.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db301116b772c927d4d4e2349b19b27cddc47735 Binary files /dev/null and b/langchain/chains/qa_with_sources/__pycache__/vector_db.cpython-39.pyc differ diff --git a/langchain/chains/qa_with_sources/base.py b/langchain/chains/qa_with_sources/base.py new file mode 100644 index 0000000000000000000000000000000000000000..628a7b360356d8480c90cd3ac4957a2f49618704 --- /dev/null +++ b/langchain/chains/qa_with_sources/base.py @@ -0,0 +1,150 @@ +"""Question answering with sources over documents.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.chains.base import Chain +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain +from langchain.chains.qa_with_sources.map_reduce_prompt import ( + COMBINE_PROMPT, + EXAMPLE_PROMPT, + QUESTION_PROMPT, +) +from langchain.docstore.document import Document +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class BaseQAWithSourcesChain(Chain, BaseModel, ABC): + """Question answering with sources over documents.""" + + combine_documents_chain: BaseCombineDocumentsChain + """Chain to use to combine documents.""" + question_key: str = "question" #: :meta private: + input_docs_key: str = "docs" #: :meta private: + answer_key: str = "answer" #: :meta private: + sources_answer_key: str = "sources" #: :meta private: + return_source_documents: bool = False + """Return the source documents.""" + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + document_prompt: BasePromptTemplate = EXAMPLE_PROMPT, + question_prompt: BasePromptTemplate = QUESTION_PROMPT, + combine_prompt: BasePromptTemplate = COMBINE_PROMPT, + **kwargs: Any, + ) -> BaseQAWithSourcesChain: + """Construct the chain from an LLM.""" + llm_question_chain = LLMChain(llm=llm, prompt=question_prompt) + llm_combine_chain = LLMChain(llm=llm, prompt=combine_prompt) + combine_results_chain = StuffDocumentsChain( + llm_chain=llm_combine_chain, + document_prompt=document_prompt, + document_variable_name="summaries", + ) + combine_document_chain = MapReduceDocumentsChain( + llm_chain=llm_question_chain, + combine_document_chain=combine_results_chain, + document_variable_name="context", + ) + return cls( + combine_documents_chain=combine_document_chain, + **kwargs, + ) + + @classmethod + def from_chain_type( + cls, + llm: BaseLanguageModel, + chain_type: str = "stuff", + chain_type_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> BaseQAWithSourcesChain: + """Load chain from chain type.""" + _chain_kwargs = chain_type_kwargs or {} + combine_document_chain = load_qa_with_sources_chain( + llm, chain_type=chain_type, **_chain_kwargs + ) + return cls(combine_documents_chain=combine_document_chain, **kwargs) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.question_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + _output_keys = [self.answer_key, self.sources_answer_key] + if self.return_source_documents: + _output_keys = _output_keys + ["source_documents"] + return _output_keys + + @root_validator(pre=True) + def validate_naming(cls, values: Dict) -> Dict: + """Fix backwards compatability in naming.""" + if "combine_document_chain" in values: + values["combine_documents_chain"] = values.pop("combine_document_chain") + return values + + @abstractmethod + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + """Get docs to run questioning over.""" + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + docs = self._get_docs(inputs) + answer, _ = self.combine_documents_chain.combine_docs(docs, **inputs) + if "SOURCES: " in answer: + answer, sources = answer.split("SOURCES: ") + else: + sources = "" + result: Dict[str, Any] = { + self.answer_key: answer, + self.sources_answer_key: sources, + } + if self.return_source_documents: + result["source_documents"] = docs + return result + + +class QAWithSourcesChain(BaseQAWithSourcesChain, BaseModel): + """Question answering with sources over documents.""" + + input_docs_key: str = "docs" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_docs_key, self.question_key] + + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + return inputs.pop(self.input_docs_key) + + @property + def _chain_type(self) -> str: + return "qa_with_sources_chain" diff --git a/langchain/chains/qa_with_sources/loading.py b/langchain/chains/qa_with_sources/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..c1d923aefb4e36e0065dfb3b6aef7043f23475de --- /dev/null +++ b/langchain/chains/qa_with_sources/loading.py @@ -0,0 +1,171 @@ +"""Load question answering with sources chains.""" +from typing import Any, Mapping, Optional, Protocol + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain +from langchain.chains.combine_documents.refine import RefineDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.qa_with_sources import ( + map_reduce_prompt, + refine_prompts, + stuff_prompt, +) +from langchain.chains.question_answering import map_rerank_prompt +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class LoadingCallable(Protocol): + """Interface for loading the combine documents chain.""" + + def __call__( + self, llm: BaseLanguageModel, **kwargs: Any + ) -> BaseCombineDocumentsChain: + """Callable to load the combine documents chain.""" + + +def _load_map_rerank_chain( + llm: BaseLanguageModel, + prompt: BasePromptTemplate = map_rerank_prompt.PROMPT, + verbose: bool = False, + document_variable_name: str = "context", + rank_key: str = "score", + answer_key: str = "answer", + **kwargs: Any, +) -> MapRerankDocumentsChain: + llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose) + return MapRerankDocumentsChain( + llm_chain=llm_chain, + rank_key=rank_key, + answer_key=answer_key, + document_variable_name=document_variable_name, + **kwargs, + ) + + +def _load_stuff_chain( + llm: BaseLanguageModel, + prompt: BasePromptTemplate = stuff_prompt.PROMPT, + document_prompt: BasePromptTemplate = stuff_prompt.EXAMPLE_PROMPT, + document_variable_name: str = "summaries", + verbose: Optional[bool] = None, + **kwargs: Any, +) -> StuffDocumentsChain: + llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose) + return StuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name=document_variable_name, + document_prompt=document_prompt, + verbose=verbose, + **kwargs, + ) + + +def _load_map_reduce_chain( + llm: BaseLanguageModel, + question_prompt: BasePromptTemplate = map_reduce_prompt.QUESTION_PROMPT, + combine_prompt: BasePromptTemplate = map_reduce_prompt.COMBINE_PROMPT, + document_prompt: BasePromptTemplate = map_reduce_prompt.EXAMPLE_PROMPT, + combine_document_variable_name: str = "summaries", + map_reduce_document_variable_name: str = "context", + collapse_prompt: Optional[BasePromptTemplate] = None, + reduce_llm: Optional[BaseLanguageModel] = None, + collapse_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + **kwargs: Any, +) -> MapReduceDocumentsChain: + map_chain = LLMChain(llm=llm, prompt=question_prompt, verbose=verbose) + _reduce_llm = reduce_llm or llm + reduce_chain = LLMChain(llm=_reduce_llm, prompt=combine_prompt, verbose=verbose) + combine_document_chain = StuffDocumentsChain( + llm_chain=reduce_chain, + document_variable_name=combine_document_variable_name, + document_prompt=document_prompt, + verbose=verbose, + ) + if collapse_prompt is None: + collapse_chain = None + if collapse_llm is not None: + raise ValueError( + "collapse_llm provided, but collapse_prompt was not: please " + "provide one or stop providing collapse_llm." + ) + else: + _collapse_llm = collapse_llm or llm + collapse_chain = StuffDocumentsChain( + llm_chain=LLMChain( + llm=_collapse_llm, + prompt=collapse_prompt, + verbose=verbose, + ), + document_variable_name=combine_document_variable_name, + document_prompt=document_prompt, + ) + return MapReduceDocumentsChain( + llm_chain=map_chain, + combine_document_chain=combine_document_chain, + document_variable_name=map_reduce_document_variable_name, + collapse_document_chain=collapse_chain, + verbose=verbose, + **kwargs, + ) + + +def _load_refine_chain( + llm: BaseLanguageModel, + question_prompt: BasePromptTemplate = refine_prompts.DEFAULT_TEXT_QA_PROMPT, + refine_prompt: BasePromptTemplate = refine_prompts.DEFAULT_REFINE_PROMPT, + document_prompt: BasePromptTemplate = refine_prompts.EXAMPLE_PROMPT, + document_variable_name: str = "context_str", + initial_response_name: str = "existing_answer", + refine_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + **kwargs: Any, +) -> RefineDocumentsChain: + initial_chain = LLMChain(llm=llm, prompt=question_prompt, verbose=verbose) + _refine_llm = refine_llm or llm + refine_chain = LLMChain(llm=_refine_llm, prompt=refine_prompt, verbose=verbose) + return RefineDocumentsChain( + initial_llm_chain=initial_chain, + refine_llm_chain=refine_chain, + document_variable_name=document_variable_name, + initial_response_name=initial_response_name, + document_prompt=document_prompt, + verbose=verbose, + **kwargs, + ) + + +def load_qa_with_sources_chain( + llm: BaseLanguageModel, + chain_type: str = "stuff", + verbose: Optional[bool] = None, + **kwargs: Any, +) -> BaseCombineDocumentsChain: + """Load question answering with sources chain. + + Args: + llm: Language Model to use in the chain. + chain_type: Type of document combining chain to use. Should be one of "stuff", + "map_reduce", and "refine". + verbose: Whether chains should be run in verbose mode or not. Note that this + applies to all chains that make up the final chain. + + Returns: + A chain to use for question answering with sources. + """ + loader_mapping: Mapping[str, LoadingCallable] = { + "stuff": _load_stuff_chain, + "map_reduce": _load_map_reduce_chain, + "refine": _load_refine_chain, + "map_rerank": _load_map_rerank_chain, + } + if chain_type not in loader_mapping: + raise ValueError( + f"Got unsupported chain type: {chain_type}. " + f"Should be one of {loader_mapping.keys()}" + ) + _func: LoadingCallable = loader_mapping[chain_type] + return _func(llm, verbose=verbose, **kwargs) diff --git a/langchain/chains/qa_with_sources/map_reduce_prompt.py b/langchain/chains/qa_with_sources/map_reduce_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..8cafe7ecfbf4b270ecf3544aad3ecf789b0331c1 --- /dev/null +++ b/langchain/chains/qa_with_sources/map_reduce_prompt.py @@ -0,0 +1,55 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. +Return any relevant text verbatim. +{context} +Question: {question} +Relevant text, if any:""" +QUESTION_PROMPT = PromptTemplate( + template=question_prompt_template, input_variables=["context", "question"] +) + +combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). +If you don't know the answer, just say that you don't know. Don't try to make up an answer. +ALWAYS return a "SOURCES" part in your answer. + +QUESTION: Which state/country's law governs the interpretation of the contract? +========= +Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights. +Source: 28-pl +Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\n\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\n\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\n\n11.9 No Third-Party Beneficiaries. +Source: 30-pl +Content: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur, +Source: 4-pl +========= +FINAL ANSWER: This Agreement is governed by English law. +SOURCES: 28-pl + +QUESTION: What did the president say about Michael Jackson? +========= +Content: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. +Source: 0-pl +Content: And we won’t stop. \n\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \n\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \n\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \n\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n\nOfficer Mora was 27 years old. \n\nOfficer Rivera was 22. \n\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. +Source: 24-pl +Content: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n\nBut I want you to know that we are going to be okay. +Source: 5-pl +Content: More support for patients and families. \n\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n\nA unity agenda for the nation. \n\nWe can do this. \n\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \n\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \n\nNow is the hour. \n\nOur moment of responsibility. \n\nOur test of resolve and conscience, of history itself. \n\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \n\nWell I know this nation. +Source: 34-pl +========= +FINAL ANSWER: The president did not mention Michael Jackson. +SOURCES: + +QUESTION: {question} +========= +{summaries} +========= +FINAL ANSWER:""" +COMBINE_PROMPT = PromptTemplate( + template=combine_prompt_template, input_variables=["summaries", "question"] +) + +EXAMPLE_PROMPT = PromptTemplate( + template="Content: {page_content}\nSource: {source}", + input_variables=["page_content", "source"], +) diff --git a/langchain/chains/qa_with_sources/refine_prompts.py b/langchain/chains/qa_with_sources/refine_prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..6920b6bba18a594425c112f1386397ded573488a --- /dev/null +++ b/langchain/chains/qa_with_sources/refine_prompts.py @@ -0,0 +1,38 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +DEFAULT_REFINE_PROMPT_TMPL = ( + "The original question is as follows: {question}\n" + "We have provided an existing answer, including sources: {existing_answer}\n" + "We have the opportunity to refine the existing answer" + "(only if needed) with some more context below.\n" + "------------\n" + "{context_str}\n" + "------------\n" + "Given the new context, refine the original answer to better " + "answer the question. " + "If you do update it, please update the sources as well. " + "If the context isn't useful, return the original answer." +) +DEFAULT_REFINE_PROMPT = PromptTemplate( + input_variables=["question", "existing_answer", "context_str"], + template=DEFAULT_REFINE_PROMPT_TMPL, +) + + +DEFAULT_TEXT_QA_PROMPT_TMPL = ( + "Context information is below. \n" + "---------------------\n" + "{context_str}" + "\n---------------------\n" + "Given the context information and not prior knowledge, " + "answer the question: {question}\n" +) +DEFAULT_TEXT_QA_PROMPT = PromptTemplate( + input_variables=["context_str", "question"], template=DEFAULT_TEXT_QA_PROMPT_TMPL +) + +EXAMPLE_PROMPT = PromptTemplate( + template="Content: {page_content}\nSource: {source}", + input_variables=["page_content", "source"], +) diff --git a/langchain/chains/qa_with_sources/retrieval.py b/langchain/chains/qa_with_sources/retrieval.py new file mode 100644 index 0000000000000000000000000000000000000000..1f50b28aba7a1912be43cc10385880b0719335f9 --- /dev/null +++ b/langchain/chains/qa_with_sources/retrieval.py @@ -0,0 +1,46 @@ +"""Question-answering with sources over an index.""" + +from typing import Any, Dict, List + +from pydantic import BaseModel, Field + +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain +from langchain.docstore.document import Document +from langchain.schema import BaseRetriever + + +class RetrievalQAWithSourcesChain(BaseQAWithSourcesChain, BaseModel): + """Question-answering with sources over an index.""" + + retriever: BaseRetriever = Field(exclude=True) + """Index to connect to.""" + reduce_k_below_max_tokens: bool = False + """Reduce the number of results to return from store based on tokens limit""" + max_tokens_limit: int = 3375 + """Restrict the docs to return from store based on tokens, + enforced only for StuffDocumentChain and if reduce_k_below_max_tokens is to true""" + + def _reduce_tokens_below_limit(self, docs: List[Document]) -> List[Document]: + num_docs = len(docs) + + if self.reduce_k_below_max_tokens and isinstance( + self.combine_documents_chain, StuffDocumentsChain + ): + tokens = [ + self.combine_documents_chain.llm_chain.llm.get_num_tokens( + doc.page_content + ) + for doc in docs + ] + token_count = sum(tokens[:num_docs]) + while token_count > self.max_tokens_limit: + num_docs -= 1 + token_count -= tokens[num_docs] + + return docs[:num_docs] + + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + question = inputs[self.question_key] + docs = self.retriever.get_relevant_texts(question) + return self._reduce_tokens_below_limit(docs) diff --git a/langchain/chains/qa_with_sources/stuff_prompt.py b/langchain/chains/qa_with_sources/stuff_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..b2112fa12b0f1a54a5623e6e7516ad8ebe938ba1 --- /dev/null +++ b/langchain/chains/qa_with_sources/stuff_prompt.py @@ -0,0 +1,44 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). +If you don't know the answer, just say that you don't know. Don't try to make up an answer. +ALWAYS return a "SOURCES" part in your answer. + +QUESTION: Which state/country's law governs the interpretation of the contract? +========= +Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights. +Source: 28-pl +Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\n\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\n\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\n\n11.9 No Third-Party Beneficiaries. +Source: 30-pl +Content: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur, +Source: 4-pl +========= +FINAL ANSWER: This Agreement is governed by English law. +SOURCES: 28-pl + +QUESTION: What did the president say about Michael Jackson? +========= +Content: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. +Source: 0-pl +Content: And we won’t stop. \n\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \n\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \n\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \n\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n\nOfficer Mora was 27 years old. \n\nOfficer Rivera was 22. \n\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. +Source: 24-pl +Content: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n\nBut I want you to know that we are going to be okay. +Source: 5-pl +Content: More support for patients and families. \n\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n\nA unity agenda for the nation. \n\nWe can do this. \n\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \n\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \n\nNow is the hour. \n\nOur moment of responsibility. \n\nOur test of resolve and conscience, of history itself. \n\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \n\nWell I know this nation. +Source: 34-pl +========= +FINAL ANSWER: The president did not mention Michael Jackson. +SOURCES: + +QUESTION: {question} +========= +{summaries} +========= +FINAL ANSWER:""" +PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"]) + +EXAMPLE_PROMPT = PromptTemplate( + template="Content: {page_content}\nSource: {source}", + input_variables=["page_content", "source"], +) diff --git a/langchain/chains/qa_with_sources/vector_db.py b/langchain/chains/qa_with_sources/vector_db.py new file mode 100644 index 0000000000000000000000000000000000000000..8a567dfaddb12b5774d32800fde55a4748b97cd8 --- /dev/null +++ b/langchain/chains/qa_with_sources/vector_db.py @@ -0,0 +1,56 @@ +"""Question-answering with sources over a vector database.""" + +from typing import Any, Dict, List + +from pydantic import BaseModel, Field + +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain +from langchain.docstore.document import Document +from langchain.vectorstores.base import VectorStore + + +class VectorDBQAWithSourcesChain(BaseQAWithSourcesChain, BaseModel): + """Question-answering with sources over a vector database.""" + + vectorstore: VectorStore = Field(exclude=True) + """Vector Database to connect to.""" + k: int = 4 + """Number of results to return from store""" + reduce_k_below_max_tokens: bool = False + """Reduce the number of results to return from store based on tokens limit""" + max_tokens_limit: int = 3375 + """Restrict the docs to return from store based on tokens, + enforced only for StuffDocumentChain and if reduce_k_below_max_tokens is to true""" + search_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Extra search args.""" + + def _reduce_tokens_below_limit(self, docs: List[Document]) -> List[Document]: + num_docs = len(docs) + + if self.reduce_k_below_max_tokens and isinstance( + self.combine_documents_chain, StuffDocumentsChain + ): + tokens = [ + self.combine_documents_chain.llm_chain.llm.get_num_tokens( + doc.page_content + ) + for doc in docs + ] + token_count = sum(tokens[:num_docs]) + while token_count > self.max_tokens_limit: + num_docs -= 1 + token_count -= tokens[num_docs] + + return docs[:num_docs] + + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + question = inputs[self.question_key] + docs = self.vectorstore.similarity_search( + question, k=self.k, **self.search_kwargs + ) + return self._reduce_tokens_below_limit(docs) + + @property + def _chain_type(self) -> str: + return "vector_db_qa_with_sources_chain" diff --git a/langchain/chains/question_answering/__init__.py b/langchain/chains/question_answering/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..522f7906088e2830a0130aa953c83e1f507f6348 --- /dev/null +++ b/langchain/chains/question_answering/__init__.py @@ -0,0 +1,220 @@ +"""Load question answering chains.""" +from typing import Any, Mapping, Optional, Protocol + +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain +from langchain.chains.combine_documents.refine import RefineDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering import ( + map_reduce_prompt, + map_rerank_prompt, + refine_prompts, + stuff_prompt, +) +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class LoadingCallable(Protocol): + """Interface for loading the combine documents chain.""" + + def __call__( + self, llm: BaseLanguageModel, **kwargs: Any + ) -> BaseCombineDocumentsChain: + """Callable to load the combine documents chain.""" + + +def _load_map_rerank_chain( + llm: BaseLanguageModel, + prompt: BasePromptTemplate = map_rerank_prompt.PROMPT, + verbose: bool = False, + document_variable_name: str = "context", + rank_key: str = "score", + answer_key: str = "answer", + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> MapRerankDocumentsChain: + llm_chain = LLMChain( + llm=llm, prompt=prompt, verbose=verbose, callback_manager=callback_manager + ) + return MapRerankDocumentsChain( + llm_chain=llm_chain, + rank_key=rank_key, + answer_key=answer_key, + document_variable_name=document_variable_name, + verbose=verbose, + callback_manager=callback_manager, + **kwargs, + ) + + +def _load_stuff_chain( + llm: BaseLanguageModel, + prompt: Optional[BasePromptTemplate] = None, + document_variable_name: str = "context", + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> StuffDocumentsChain: + _prompt = prompt or stuff_prompt.PROMPT_SELECTOR.get_prompt(llm) + llm_chain = LLMChain( + llm=llm, prompt=_prompt, verbose=verbose, callback_manager=callback_manager + ) + # TODO: document prompt + return StuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name=document_variable_name, + verbose=verbose, + callback_manager=callback_manager, + **kwargs, + ) + + +def _load_map_reduce_chain( + llm: BaseLanguageModel, + question_prompt: Optional[BasePromptTemplate] = None, + combine_prompt: Optional[BasePromptTemplate] = None, + combine_document_variable_name: str = "summaries", + map_reduce_document_variable_name: str = "context", + collapse_prompt: Optional[BasePromptTemplate] = None, + reduce_llm: Optional[BaseLanguageModel] = None, + collapse_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> MapReduceDocumentsChain: + _question_prompt = ( + question_prompt or map_reduce_prompt.QUESTION_PROMPT_SELECTOR.get_prompt(llm) + ) + _combine_prompt = ( + combine_prompt or map_reduce_prompt.COMBINE_PROMPT_SELECTOR.get_prompt(llm) + ) + map_chain = LLMChain( + llm=llm, + prompt=_question_prompt, + verbose=verbose, + callback_manager=callback_manager, + ) + _reduce_llm = reduce_llm or llm + reduce_chain = LLMChain( + llm=_reduce_llm, + prompt=_combine_prompt, + verbose=verbose, + callback_manager=callback_manager, + ) + # TODO: document prompt + combine_document_chain = StuffDocumentsChain( + llm_chain=reduce_chain, + document_variable_name=combine_document_variable_name, + verbose=verbose, + callback_manager=callback_manager, + ) + if collapse_prompt is None: + collapse_chain = None + if collapse_llm is not None: + raise ValueError( + "collapse_llm provided, but collapse_prompt was not: please " + "provide one or stop providing collapse_llm." + ) + else: + _collapse_llm = collapse_llm or llm + collapse_chain = StuffDocumentsChain( + llm_chain=LLMChain( + llm=_collapse_llm, + prompt=collapse_prompt, + verbose=verbose, + callback_manager=callback_manager, + ), + document_variable_name=combine_document_variable_name, + verbose=verbose, + callback_manager=callback_manager, + ) + return MapReduceDocumentsChain( + llm_chain=map_chain, + combine_document_chain=combine_document_chain, + document_variable_name=map_reduce_document_variable_name, + collapse_document_chain=collapse_chain, + verbose=verbose, + callback_manager=callback_manager, + **kwargs, + ) + + +def _load_refine_chain( + llm: BaseLanguageModel, + question_prompt: Optional[BasePromptTemplate] = None, + refine_prompt: Optional[BasePromptTemplate] = None, + document_variable_name: str = "context_str", + initial_response_name: str = "existing_answer", + refine_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> RefineDocumentsChain: + _question_prompt = ( + question_prompt or refine_prompts.QUESTION_PROMPT_SELECTOR.get_prompt(llm) + ) + _refine_prompt = refine_prompt or refine_prompts.REFINE_PROMPT_SELECTOR.get_prompt( + llm + ) + initial_chain = LLMChain( + llm=llm, + prompt=_question_prompt, + verbose=verbose, + callback_manager=callback_manager, + ) + _refine_llm = refine_llm or llm + refine_chain = LLMChain( + llm=_refine_llm, + prompt=_refine_prompt, + verbose=verbose, + callback_manager=callback_manager, + ) + return RefineDocumentsChain( + initial_llm_chain=initial_chain, + refine_llm_chain=refine_chain, + document_variable_name=document_variable_name, + initial_response_name=initial_response_name, + verbose=verbose, + callback_manager=callback_manager, + **kwargs, + ) + + +def load_qa_chain( + llm: BaseLanguageModel, + chain_type: str = "stuff", + verbose: Optional[bool] = None, + callback_manager: Optional[BaseCallbackManager] = None, + **kwargs: Any, +) -> BaseCombineDocumentsChain: + """Load question answering chain. + + Args: + llm: Language Model to use in the chain. + chain_type: Type of document combining chain to use. Should be one of "stuff", + "map_reduce", and "refine". + verbose: Whether chains should be run in verbose mode or not. Note that this + applies to all chains that make up the final chain. + callback_manager: Callback manager to use for the chain. + + Returns: + A chain to use for question answering. + """ + loader_mapping: Mapping[str, LoadingCallable] = { + "stuff": _load_stuff_chain, + "map_reduce": _load_map_reduce_chain, + "refine": _load_refine_chain, + "map_rerank": _load_map_rerank_chain, + } + if chain_type not in loader_mapping: + raise ValueError( + f"Got unsupported chain type: {chain_type}. " + f"Should be one of {loader_mapping.keys()}" + ) + return loader_mapping[chain_type]( + llm, verbose=verbose, callback_manager=callback_manager, **kwargs + ) diff --git a/langchain/chains/question_answering/__pycache__/__init__.cpython-39.pyc b/langchain/chains/question_answering/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd9780e698cfe1fcf560a6dfc95d50bf53fc6bc4 Binary files /dev/null and b/langchain/chains/question_answering/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/question_answering/__pycache__/map_reduce_prompt.cpython-39.pyc b/langchain/chains/question_answering/__pycache__/map_reduce_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec341c28e44fe66aa85be73b795404ff47a8a8c5 Binary files /dev/null and b/langchain/chains/question_answering/__pycache__/map_reduce_prompt.cpython-39.pyc differ diff --git a/langchain/chains/question_answering/__pycache__/map_rerank_prompt.cpython-39.pyc b/langchain/chains/question_answering/__pycache__/map_rerank_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ceae57db194190c1e2114da5d574a26c4c6eb3c6 Binary files /dev/null and b/langchain/chains/question_answering/__pycache__/map_rerank_prompt.cpython-39.pyc differ diff --git a/langchain/chains/question_answering/__pycache__/refine_prompts.cpython-39.pyc b/langchain/chains/question_answering/__pycache__/refine_prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ab3f8c302f54ec8e1c4279911d940a82ed2f57a Binary files /dev/null and b/langchain/chains/question_answering/__pycache__/refine_prompts.cpython-39.pyc differ diff --git a/langchain/chains/question_answering/__pycache__/stuff_prompt.cpython-39.pyc b/langchain/chains/question_answering/__pycache__/stuff_prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61c78721461a18cfbc02741d1f1960ae63f0f1ea Binary files /dev/null and b/langchain/chains/question_answering/__pycache__/stuff_prompt.cpython-39.pyc differ diff --git a/langchain/chains/question_answering/map_reduce_prompt.py b/langchain/chains/question_answering/map_reduce_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..7c0efd777773fd7683e34c183550568755bf8a2a --- /dev/null +++ b/langchain/chains/question_answering/map_reduce_prompt.py @@ -0,0 +1,83 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate +from langchain.prompts.chat import ( + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, + ChatPromptTemplate, +) +from langchain.chains.prompt_selector import ( + ConditionalPromptSelector, + is_chat_model, +) + +question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. +Return any relevant text verbatim. +{context} +Question: {question} +Relevant text, if any:""" +QUESTION_PROMPT = PromptTemplate( + template=question_prompt_template, input_variables=["context", "question"] +) +system_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. +Return any relevant text verbatim. +______________________ +{context}""" +messages = [ + SystemMessagePromptTemplate.from_template(system_template), + HumanMessagePromptTemplate.from_template("{question}"), +] +CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages) + + +QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=QUESTION_PROMPT, conditionals=[(is_chat_model, CHAT_QUESTION_PROMPT)] +) + +combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer. +If you don't know the answer, just say that you don't know. Don't try to make up an answer. + +QUESTION: Which state/country's law governs the interpretation of the contract? +========= +Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights. + +Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\n\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\n\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\n\n11.9 No Third-Party Beneficiaries. + +Content: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur, +========= +FINAL ANSWER: This Agreement is governed by English law. + +QUESTION: What did the president say about Michael Jackson? +========= +Content: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. + +Content: And we won’t stop. \n\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \n\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \n\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \n\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n\nOfficer Mora was 27 years old. \n\nOfficer Rivera was 22. \n\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. + +Content: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n\nBut I want you to know that we are going to be okay. + +Content: More support for patients and families. \n\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n\nA unity agenda for the nation. \n\nWe can do this. \n\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \n\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \n\nNow is the hour. \n\nOur moment of responsibility. \n\nOur test of resolve and conscience, of history itself. \n\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \n\nWell I know this nation. +========= +FINAL ANSWER: The president did not mention Michael Jackson. + +QUESTION: {question} +========= +{summaries} +========= +FINAL ANSWER:""" +COMBINE_PROMPT = PromptTemplate( + template=combine_prompt_template, input_variables=["summaries", "question"] +) + +system_template = """Given the following extracted parts of a long document and a question, create a final answer. +If you don't know the answer, just say that you don't know. Don't try to make up an answer. +______________________ +{summaries}""" +messages = [ + SystemMessagePromptTemplate.from_template(system_template), + HumanMessagePromptTemplate.from_template("{question}"), +] +CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages) + + +COMBINE_PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=COMBINE_PROMPT, conditionals=[(is_chat_model, CHAT_COMBINE_PROMPT)] +) diff --git a/langchain/chains/question_answering/map_rerank_prompt.py b/langchain/chains/question_answering/map_rerank_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..0fd945c4bdfd29b197d1e0382e26d03faccfa4f8 --- /dev/null +++ b/langchain/chains/question_answering/map_rerank_prompt.py @@ -0,0 +1,66 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate +from langchain.output_parsers.regex import RegexParser + +output_parser = RegexParser( + regex=r"(.*?)\nScore: (.*)", + output_keys=["answer", "score"], +) + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format: + +Question: [question here] +Helpful Answer: [answer here] +Score: [score between 0 and 100] + +How to determine the score: +- Higher is a better answer +- Better responds fully to the asked question, with sufficient level of detail +- If you do not know the answer based on the context, that should be a score of 0 +- Don't be overconfident! + +Example #1 + +Context: +--------- +Apples are red +--------- +Question: what color are apples? +Helpful Answer: red +Score: 100 + +Example #2 + +Context: +--------- +it was night and the witness forgot his glasses. he was not sure if it was a sports car or an suv +--------- +Question: what type was the car? +Helpful Answer: a sports car or an suv +Score: 60 + +Example #3 + +Context: +--------- +Pears are either red or orange +--------- +Question: what color are apples? +Helpful Answer: This document does not answer the question +Score: 0 + +Begin! + +Context: +--------- +{context} +--------- +Question: {question} +Helpful Answer:""" +PROMPT = PromptTemplate( + template=prompt_template, + input_variables=["context", "question"], + output_parser=output_parser, +) diff --git a/langchain/chains/question_answering/refine_prompts.py b/langchain/chains/question_answering/refine_prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..78c6dd77d14ddfc2a36733c32d60ba3236621cbb --- /dev/null +++ b/langchain/chains/question_answering/refine_prompts.py @@ -0,0 +1,80 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate +from langchain.prompts.chat import ( + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, + ChatPromptTemplate, + AIMessagePromptTemplate, +) +from langchain.chains.prompt_selector import ( + ConditionalPromptSelector, + is_chat_model, +) + + +DEFAULT_REFINE_PROMPT_TMPL = ( + "The original question is as follows: {question}\n" + "We have provided an existing answer: {existing_answer}\n" + "We have the opportunity to refine the existing answer" + "(only if needed) with some more context below.\n" + "------------\n" + "{context_str}\n" + "------------\n" + "Given the new context, refine the original answer to better " + "answer the question. " + "If the context isn't useful, return the original answer." +) +DEFAULT_REFINE_PROMPT = PromptTemplate( + input_variables=["question", "existing_answer", "context_str"], + template=DEFAULT_REFINE_PROMPT_TMPL, +) +refine_template = ( + "We have the opportunity to refine the existing answer" + "(only if needed) with some more context below.\n" + "------------\n" + "{context_str}\n" + "------------\n" + "Given the new context, refine the original answer to better " + "answer the question. " + "If the context isn't useful, return the original answer." +) +messages = [ + HumanMessagePromptTemplate.from_template("{question}"), + AIMessagePromptTemplate.from_template("{existing_answer}"), + HumanMessagePromptTemplate.from_template(refine_template), +] +CHAT_REFINE_PROMPT = ChatPromptTemplate.from_messages(messages) +REFINE_PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=DEFAULT_REFINE_PROMPT, + conditionals=[(is_chat_model, CHAT_REFINE_PROMPT)], +) + + +DEFAULT_TEXT_QA_PROMPT_TMPL = ( + "Context information is below. \n" + "---------------------\n" + "{context_str}" + "\n---------------------\n" + "Given the context information and not prior knowledge, " + "answer the question: {question}\n" +) +DEFAULT_TEXT_QA_PROMPT = PromptTemplate( + input_variables=["context_str", "question"], template=DEFAULT_TEXT_QA_PROMPT_TMPL +) +chat_qa_prompt_template = ( + "Context information is below. \n" + "---------------------\n" + "{context_str}" + "\n---------------------\n" + "Given the context information and not prior knowledge, " + "answer any questions" +) +messages = [ + SystemMessagePromptTemplate.from_template(chat_qa_prompt_template), + HumanMessagePromptTemplate.from_template("{question}"), +] +CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages) +QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=DEFAULT_TEXT_QA_PROMPT, + conditionals=[(is_chat_model, CHAT_QUESTION_PROMPT)], +) diff --git a/langchain/chains/question_answering/stuff_prompt.py b/langchain/chains/question_answering/stuff_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..968d2950b691d3f38afcf7c689c97bd971bf2a05 --- /dev/null +++ b/langchain/chains/question_answering/stuff_prompt.py @@ -0,0 +1,37 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate +from langchain.chains.prompt_selector import ( + ConditionalPromptSelector, + is_chat_model, +) +from langchain.prompts.chat import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) + + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +) + +system_template = """Use the following pieces of context to answer the users question. +If you don't know the answer, just say that you don't know, don't try to make up an answer. +---------------- +{context}""" +messages = [ + SystemMessagePromptTemplate.from_template(system_template), + HumanMessagePromptTemplate.from_template("{question}"), +] +CHAT_PROMPT = ChatPromptTemplate.from_messages(messages) + + +PROMPT_SELECTOR = ConditionalPromptSelector( + default_prompt=PROMPT, conditionals=[(is_chat_model, CHAT_PROMPT)] +) diff --git a/langchain/chains/retrieval_qa/__init__.py b/langchain/chains/retrieval_qa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b8e4d9aa0b2012d66851b2ef64073efd2b807a70 --- /dev/null +++ b/langchain/chains/retrieval_qa/__init__.py @@ -0,0 +1 @@ +"""Chain for question-answering against a vector database.""" diff --git a/langchain/chains/retrieval_qa/__pycache__/__init__.cpython-39.pyc b/langchain/chains/retrieval_qa/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09822fb1799da522405088e6107407c06ae82f9d Binary files /dev/null and b/langchain/chains/retrieval_qa/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/retrieval_qa/__pycache__/base.cpython-39.pyc b/langchain/chains/retrieval_qa/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dd7624c8565b01fa93eb7ff46d797b7d1c3bcfd Binary files /dev/null and b/langchain/chains/retrieval_qa/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/retrieval_qa/base.py b/langchain/chains/retrieval_qa/base.py new file mode 100644 index 0000000000000000000000000000000000000000..c1543da8eba61b8d5ea29404f3ed2af95f6521d6 --- /dev/null +++ b/langchain/chains/retrieval_qa/base.py @@ -0,0 +1,174 @@ +"""Chain for question-answering against a vector database.""" +from __future__ import annotations + +from abc import abstractmethod +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains.base import Chain +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering import load_qa_chain +from langchain.chains.question_answering.stuff_prompt import PROMPT_SELECTOR +from langchain.prompts import PromptTemplate +from langchain.schema import BaseLanguageModel, BaseRetriever, Document +from langchain.vectorstores.base import VectorStore + + +class BaseRetrievalQA(Chain, BaseModel): + combine_documents_chain: BaseCombineDocumentsChain + """Chain to use to combine the documents.""" + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + return_source_documents: bool = False + """Return the source documents.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + allow_population_by_field_name = True + + @property + def input_keys(self) -> List[str]: + """Return the input keys. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the output keys. + + :meta private: + """ + _output_keys = [self.output_key] + if self.return_source_documents: + _output_keys = _output_keys + ["source_documents"] + return _output_keys + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + prompt: Optional[PromptTemplate] = None, + **kwargs: Any, + ) -> BaseRetrievalQA: + """Initialize from LLM.""" + _prompt = prompt or PROMPT_SELECTOR.get_prompt(llm) + llm_chain = LLMChain(llm=llm, prompt=_prompt) + document_prompt = PromptTemplate( + input_variables=["page_content"], template="Context:\n{page_content}" + ) + combine_documents_chain = StuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name="context", + document_prompt=document_prompt, + ) + + return cls(combine_documents_chain=combine_documents_chain, **kwargs) + + @classmethod + def from_chain_type( + cls, + llm: BaseLanguageModel, + chain_type: str = "stuff", + chain_type_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> BaseRetrievalQA: + """Load chain from chain type.""" + _chain_type_kwargs = chain_type_kwargs or {} + combine_documents_chain = load_qa_chain( + llm, chain_type=chain_type, **_chain_type_kwargs + ) + return cls(combine_documents_chain=combine_documents_chain, **kwargs) + + @abstractmethod + def _get_docs(self, question: str) -> List[Document]: + """Get documents to do question answering over.""" + + def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]: + """Run get_relevant_text and llm on input query. + + If chain has 'return_source_documents' as 'True', returns + the retrieved documents as well under the key 'source_documents'. + + Example: + .. code-block:: python + + res = indexqa({'query': 'This is my query'}) + answer, docs = res['result'], res['source_documents'] + """ + question = inputs[self.input_key] + + docs = self._get_docs(question) + answer, _ = self.combine_documents_chain.combine_docs(docs, question=question) + + if self.return_source_documents: + return {self.output_key: answer, "source_documents": docs} + else: + return {self.output_key: answer} + + +class RetrievalQA(BaseRetrievalQA, BaseModel): + """Chain for question-answering against an index. + + Example: + .. code-block:: python + + from langchain.llms import OpenAI + from langchain.chains import RetrievalQA + from langchain.faiss import FAISS + vectordb = FAISS(...) + retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=vectordb) + + """ + + retriever: BaseRetriever = Field(exclude=True) + + def _get_docs(self, question: str) -> List[Document]: + return self.retriever.get_relevant_texts(question) + + +class VectorDBQA(BaseRetrievalQA, BaseModel): + """Chain for question-answering against a vector database.""" + + vectorstore: VectorStore = Field(exclude=True, alias="vectorstore") + """Vector Database to connect to.""" + k: int = 4 + """Number of documents to query for.""" + search_type: str = "similarity" + """Search type to use over vectorstore. `similarity` or `mmr`.""" + search_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Extra search args.""" + + @root_validator() + def validate_search_type(cls, values: Dict) -> Dict: + """Validate search type.""" + if "search_type" in values: + search_type = values["search_type"] + if search_type not in ("similarity", "mmr"): + raise ValueError(f"search_type of {search_type} not allowed.") + return values + + def _get_docs(self, question: str) -> List[Document]: + if self.search_type == "similarity": + docs = self.vectorstore.similarity_search( + question, k=self.k, **self.search_kwargs + ) + elif self.search_type == "mmr": + docs = self.vectorstore.max_marginal_relevance_search( + question, k=self.k, **self.search_kwargs + ) + else: + raise ValueError(f"search_type of {self.search_type} not allowed.") + return docs + + @property + def _chain_type(self) -> str: + """Return the chain type.""" + return "vector_db_qa" diff --git a/langchain/chains/retrieval_qa/prompt.py b/langchain/chains/retrieval_qa/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..9ebb89eac923b64854dcb03cd768a64f5a74bcaa --- /dev/null +++ b/langchain/chains/retrieval_qa/prompt.py @@ -0,0 +1,12 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +) diff --git a/langchain/chains/sequential.py b/langchain/chains/sequential.py new file mode 100644 index 0000000000000000000000000000000000000000..9d5d66be5a15d592b883a626752ef88eea669f59 --- /dev/null +++ b/langchain/chains/sequential.py @@ -0,0 +1,153 @@ +"""Chain pipeline where the outputs of one step feed directly into next.""" +from typing import Dict, List + +from pydantic import BaseModel, Extra, root_validator + +from langchain.chains.base import Chain +from langchain.input import get_color_mapping + + +class SequentialChain(Chain, BaseModel): + """Chain where the outputs of one chain feed directly into next.""" + + chains: List[Chain] + input_variables: List[str] + output_variables: List[str] #: :meta private: + return_all: bool = False + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return expected input keys to the chain. + + :meta private: + """ + return self.input_variables + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return self.output_variables + + @root_validator(pre=True) + def validate_chains(cls, values: Dict) -> Dict: + """Validate that the correct inputs exist for all chains.""" + chains = values["chains"] + input_variables = values["input_variables"] + memory_keys = list() + if "memory" in values and values["memory"] is not None: + """Validate that prompt input variables are consistent.""" + memory_keys = values["memory"].memory_variables + if any(input_variables) in memory_keys: + overlapping_keys = input_variables & memory_keys + raise ValueError( + f"The the input key(s) {''.join(overlapping_keys)} are found " + f"in the Memory keys ({memory_keys}) - please use input and " + f"memory keys that don't overlap." + ) + + known_variables = set(input_variables + memory_keys) + + for chain in chains: + missing_vars = set(chain.input_keys).difference(known_variables) + if missing_vars: + raise ValueError( + f"Missing required input keys: {missing_vars}, " + f"only had {known_variables}" + ) + overlapping_keys = known_variables.intersection(chain.output_keys) + if overlapping_keys: + raise ValueError( + f"Chain returned keys that already exist: {overlapping_keys}" + ) + + known_variables |= set(chain.output_keys) + + if "output_variables" not in values: + if values.get("return_all", False): + output_keys = known_variables.difference(input_variables) + else: + output_keys = chains[-1].output_keys + values["output_variables"] = output_keys + else: + missing_vars = set(values["output_variables"]).difference(known_variables) + if missing_vars: + raise ValueError( + f"Expected output variables that were not found: {missing_vars}." + ) + + return values + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + known_values = inputs.copy() + for i, chain in enumerate(self.chains): + outputs = chain(known_values, return_only_outputs=True) + known_values.update(outputs) + return {k: known_values[k] for k in self.output_variables} + + +class SimpleSequentialChain(Chain, BaseModel): + """Simple chain where the outputs of one step feed directly into next.""" + + chains: List[Chain] + strip_outputs: bool = False + input_key: str = "input" #: :meta private: + output_key: str = "output" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + @root_validator() + def validate_chains(cls, values: Dict) -> Dict: + """Validate that chains are all single input/output.""" + for chain in values["chains"]: + if len(chain.input_keys) != 1: + raise ValueError( + "Chains used in SimplePipeline should all have one input, got " + f"{chain} with {len(chain.input_keys)} inputs." + ) + if len(chain.output_keys) != 1: + raise ValueError( + "Chains used in SimplePipeline should all have one output, got " + f"{chain} with {len(chain.output_keys)} outputs." + ) + return values + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + _input = inputs[self.input_key] + color_mapping = get_color_mapping([str(i) for i in range(len(self.chains))]) + for i, chain in enumerate(self.chains): + _input = chain.run(_input) + if self.strip_outputs: + _input = _input.strip() + self.callback_manager.on_text( + _input, color=color_mapping[str(i)], end="\n", verbose=self.verbose + ) + return {self.output_key: _input} diff --git a/langchain/chains/sql_database/__init__.py b/langchain/chains/sql_database/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b704f72c280d2732f484c3ef389c1e3126746a14 --- /dev/null +++ b/langchain/chains/sql_database/__init__.py @@ -0,0 +1 @@ +"""Chain for interacting with SQL Database.""" diff --git a/langchain/chains/sql_database/__pycache__/__init__.cpython-39.pyc b/langchain/chains/sql_database/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f952ffe4acc5df1ca937fc2dd583a8bf86972743 Binary files /dev/null and b/langchain/chains/sql_database/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/sql_database/__pycache__/base.cpython-39.pyc b/langchain/chains/sql_database/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94ba8ecf12d61ad9d2ed17e728bc061588b1fb64 Binary files /dev/null and b/langchain/chains/sql_database/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chains/sql_database/__pycache__/prompt.cpython-39.pyc b/langchain/chains/sql_database/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..557cc26ef4922ff52571b5d36007d4c1d36773ec Binary files /dev/null and b/langchain/chains/sql_database/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/chains/sql_database/base.py b/langchain/chains/sql_database/base.py new file mode 100644 index 0000000000000000000000000000000000000000..6f95901456fc01c10511abbb6a210d4550888f25 --- /dev/null +++ b/langchain/chains/sql_database/base.py @@ -0,0 +1,186 @@ +"""Chain for interacting with SQL Database.""" +from __future__ import annotations + +from typing import Any, Dict, List + +from pydantic import BaseModel, Extra, Field + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.sql_database.prompt import DECIDER_PROMPT, PROMPT +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel +from langchain.sql_database import SQLDatabase + + +class SQLDatabaseChain(Chain, BaseModel): + """Chain for interacting with SQL Database. + + Example: + .. code-block:: python + + from langchain import SQLDatabaseChain, OpenAI, SQLDatabase + db = SQLDatabase(...) + db_chain = SQLDatabaseChain(llm=OpenAI(), database=db) + """ + + llm: BaseLanguageModel + """LLM wrapper to use.""" + database: SQLDatabase = Field(exclude=True) + """SQL Database to connect to.""" + prompt: BasePromptTemplate = PROMPT + """Prompt to use to translate natural language to SQL.""" + top_k: int = 5 + """Number of results to return from the query""" + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + return_intermediate_steps: bool = False + """Whether or not to return the intermediate steps along with the final answer.""" + return_direct: bool = False + """Whether or not to return the result of querying the SQL table directly.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + if not self.return_intermediate_steps: + return [self.output_key] + else: + return [self.output_key, "intermediate_steps"] + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + llm_chain = LLMChain(llm=self.llm, prompt=self.prompt) + input_text = f"{inputs[self.input_key]} \nSQLQuery:" + self.callback_manager.on_text(input_text, verbose=self.verbose) + # If not present, then defaults to None which is all tables. + table_names_to_use = inputs.get("table_names_to_use") + table_info = self.database.get_table_info(table_names=table_names_to_use) + llm_inputs = { + "input": input_text, + "top_k": self.top_k, + "dialect": self.database.dialect, + "table_info": table_info, + "stop": ["\nSQLResult:"], + } + intermediate_steps = [] + sql_cmd = llm_chain.predict(**llm_inputs) + intermediate_steps.append(sql_cmd) + self.callback_manager.on_text(sql_cmd, color="green", verbose=self.verbose) + result = self.database.run(sql_cmd) + intermediate_steps.append(result) + self.callback_manager.on_text("\nSQLResult: ", verbose=self.verbose) + self.callback_manager.on_text(result, color="yellow", verbose=self.verbose) + # If return direct, we just set the final result equal to the sql query + if self.return_direct: + final_result = result + else: + self.callback_manager.on_text("\nAnswer:", verbose=self.verbose) + input_text += f"{sql_cmd}\nSQLResult: {result}\nAnswer:" + llm_inputs["input"] = input_text + final_result = llm_chain.predict(**llm_inputs) + self.callback_manager.on_text( + final_result, color="green", verbose=self.verbose + ) + chain_result: Dict[str, Any] = {self.output_key: final_result} + if self.return_intermediate_steps: + chain_result["intermediate_steps"] = intermediate_steps + return chain_result + + @property + def _chain_type(self) -> str: + return "sql_database_chain" + + +class SQLDatabaseSequentialChain(Chain, BaseModel): + """Chain for querying SQL database that is a sequential chain. + + The chain is as follows: + 1. Based on the query, determine which tables to use. + 2. Based on those tables, call the normal SQL database chain. + + This is useful in cases where the number of tables in the database is large. + """ + + return_intermediate_steps: bool = False + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + database: SQLDatabase, + query_prompt: BasePromptTemplate = PROMPT, + decider_prompt: BasePromptTemplate = DECIDER_PROMPT, + **kwargs: Any, + ) -> SQLDatabaseSequentialChain: + """Load the necessary chains.""" + sql_chain = SQLDatabaseChain( + llm=llm, database=database, prompt=query_prompt, **kwargs + ) + decider_chain = LLMChain( + llm=llm, prompt=decider_prompt, output_key="table_names" + ) + return cls(sql_chain=sql_chain, decider_chain=decider_chain, **kwargs) + + decider_chain: LLMChain + sql_chain: SQLDatabaseChain + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + if not self.return_intermediate_steps: + return [self.output_key] + else: + return [self.output_key, "intermediate_steps"] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + _table_names = self.sql_chain.database.get_table_names() + table_names = ", ".join(_table_names) + llm_inputs = { + "query": inputs[self.input_key], + "table_names": table_names, + } + table_names_to_use = self.decider_chain.predict_and_parse(**llm_inputs) + self.callback_manager.on_text( + "Table names to use:", end="\n", verbose=self.verbose + ) + self.callback_manager.on_text( + str(table_names_to_use), color="yellow", verbose=self.verbose + ) + new_inputs = { + self.sql_chain.input_key: inputs[self.input_key], + "table_names_to_use": table_names_to_use, + } + return self.sql_chain(new_inputs, return_only_outputs=True) + + @property + def _chain_type(self) -> str: + return "sql_database_sequential_chain" diff --git a/langchain/chains/sql_database/prompt.py b/langchain/chains/sql_database/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..730c5a2374334abe309ba543c11d4388951f3af0 --- /dev/null +++ b/langchain/chains/sql_database/prompt.py @@ -0,0 +1,40 @@ +# flake8: noqa +from langchain.output_parsers.list import CommaSeparatedListOutputParser +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database. + +Never query for all the columns from a specific table, only ask for a the few relevant columns given the question. + +Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table. + +Use the following format: + +Question: "Question here" +SQLQuery: "SQL Query to run" +SQLResult: "Result of the SQLQuery" +Answer: "Final answer here" + +Only use the tables listed below. + +{table_info} + +Question: {input}""" + +PROMPT = PromptTemplate( + input_variables=["input", "table_info", "dialect", "top_k"], + template=_DEFAULT_TEMPLATE, +) + +_DECIDER_TEMPLATE = """Given the below input question and list of potential tables, output a comma separated list of the table names that may be necessary to answer this question. + +Question: {query} + +Table Names: {table_names} + +Relevant Table Names:""" +DECIDER_PROMPT = PromptTemplate( + input_variables=["query", "table_names"], + template=_DECIDER_TEMPLATE, + output_parser=CommaSeparatedListOutputParser(), +) diff --git a/langchain/chains/summarize/__init__.py b/langchain/chains/summarize/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c31fda479f9d11770526af76d79414cb9aacdd98 --- /dev/null +++ b/langchain/chains/summarize/__init__.py @@ -0,0 +1,139 @@ +"""Load summarizing chains.""" +from typing import Any, Mapping, Optional, Protocol + +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain +from langchain.chains.combine_documents.refine import RefineDocumentsChain +from langchain.chains.combine_documents.stuff import StuffDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.summarize import map_reduce_prompt, refine_prompts, stuff_prompt +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel + + +class LoadingCallable(Protocol): + """Interface for loading the combine documents chain.""" + + def __call__( + self, llm: BaseLanguageModel, **kwargs: Any + ) -> BaseCombineDocumentsChain: + """Callable to load the combine documents chain.""" + + +def _load_stuff_chain( + llm: BaseLanguageModel, + prompt: BasePromptTemplate = stuff_prompt.PROMPT, + document_variable_name: str = "text", + verbose: Optional[bool] = None, + **kwargs: Any, +) -> StuffDocumentsChain: + llm_chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose) + # TODO: document prompt + return StuffDocumentsChain( + llm_chain=llm_chain, + document_variable_name=document_variable_name, + verbose=verbose, + **kwargs, + ) + + +def _load_map_reduce_chain( + llm: BaseLanguageModel, + map_prompt: BasePromptTemplate = map_reduce_prompt.PROMPT, + combine_prompt: BasePromptTemplate = map_reduce_prompt.PROMPT, + combine_document_variable_name: str = "text", + map_reduce_document_variable_name: str = "text", + collapse_prompt: Optional[BasePromptTemplate] = None, + reduce_llm: Optional[BaseLanguageModel] = None, + collapse_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + **kwargs: Any, +) -> MapReduceDocumentsChain: + map_chain = LLMChain(llm=llm, prompt=map_prompt, verbose=verbose) + _reduce_llm = reduce_llm or llm + reduce_chain = LLMChain(llm=_reduce_llm, prompt=combine_prompt, verbose=verbose) + # TODO: document prompt + combine_document_chain = StuffDocumentsChain( + llm_chain=reduce_chain, + document_variable_name=combine_document_variable_name, + verbose=verbose, + ) + if collapse_prompt is None: + collapse_chain = None + if collapse_llm is not None: + raise ValueError( + "collapse_llm provided, but collapse_prompt was not: please " + "provide one or stop providing collapse_llm." + ) + else: + _collapse_llm = collapse_llm or llm + collapse_chain = StuffDocumentsChain( + llm_chain=LLMChain( + llm=_collapse_llm, + prompt=collapse_prompt, + verbose=verbose, + ), + document_variable_name=combine_document_variable_name, + ) + return MapReduceDocumentsChain( + llm_chain=map_chain, + combine_document_chain=combine_document_chain, + document_variable_name=map_reduce_document_variable_name, + collapse_document_chain=collapse_chain, + verbose=verbose, + **kwargs, + ) + + +def _load_refine_chain( + llm: BaseLanguageModel, + question_prompt: BasePromptTemplate = refine_prompts.PROMPT, + refine_prompt: BasePromptTemplate = refine_prompts.REFINE_PROMPT, + document_variable_name: str = "text", + initial_response_name: str = "existing_answer", + refine_llm: Optional[BaseLanguageModel] = None, + verbose: Optional[bool] = None, + **kwargs: Any, +) -> RefineDocumentsChain: + initial_chain = LLMChain(llm=llm, prompt=question_prompt, verbose=verbose) + _refine_llm = refine_llm or llm + refine_chain = LLMChain(llm=_refine_llm, prompt=refine_prompt, verbose=verbose) + return RefineDocumentsChain( + initial_llm_chain=initial_chain, + refine_llm_chain=refine_chain, + document_variable_name=document_variable_name, + initial_response_name=initial_response_name, + verbose=verbose, + **kwargs, + ) + + +def load_summarize_chain( + llm: BaseLanguageModel, + chain_type: str = "stuff", + verbose: Optional[bool] = None, + **kwargs: Any, +) -> BaseCombineDocumentsChain: + """Load summarizing chain. + + Args: + llm: Language Model to use in the chain. + chain_type: Type of document combining chain to use. Should be one of "stuff", + "map_reduce", and "refine". + verbose: Whether chains should be run in verbose mode or not. Note that this + applies to all chains that make up the final chain. + + Returns: + A chain to use for summarizing. + """ + loader_mapping: Mapping[str, LoadingCallable] = { + "stuff": _load_stuff_chain, + "map_reduce": _load_map_reduce_chain, + "refine": _load_refine_chain, + } + if chain_type not in loader_mapping: + raise ValueError( + f"Got unsupported chain type: {chain_type}. " + f"Should be one of {loader_mapping.keys()}" + ) + return loader_mapping[chain_type](llm, verbose=verbose, **kwargs) diff --git a/langchain/chains/summarize/map_reduce_prompt.py b/langchain/chains/summarize/map_reduce_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd9f941f432dfd04c1dd28d7a88a3c558ed5750 --- /dev/null +++ b/langchain/chains/summarize/map_reduce_prompt.py @@ -0,0 +1,11 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +prompt_template = """Write a concise summary of the following: + + +"{text}" + + +CONCISE SUMMARY:""" +PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) diff --git a/langchain/chains/summarize/refine_prompts.py b/langchain/chains/summarize/refine_prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..fc59d9e23dfd21625cf8e2e202188c1916cd9e73 --- /dev/null +++ b/langchain/chains/summarize/refine_prompts.py @@ -0,0 +1,28 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +REFINE_PROMPT_TMPL = ( + "Your job is to produce a final summary\n" + "We have provided an existing summary up to a certain point: {existing_answer}\n" + "We have the opportunity to refine the existing summary" + "(only if needed) with some more context below.\n" + "------------\n" + "{text}\n" + "------------\n" + "Given the new context, refine the original summary" + "If the context isn't useful, return the original summary." +) +REFINE_PROMPT = PromptTemplate( + input_variables=["existing_answer", "text"], + template=REFINE_PROMPT_TMPL, +) + + +prompt_template = """Write a concise summary of the following: + + +"{text}" + + +CONCISE SUMMARY:""" +PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) diff --git a/langchain/chains/summarize/stuff_prompt.py b/langchain/chains/summarize/stuff_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd9f941f432dfd04c1dd28d7a88a3c558ed5750 --- /dev/null +++ b/langchain/chains/summarize/stuff_prompt.py @@ -0,0 +1,11 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +prompt_template = """Write a concise summary of the following: + + +"{text}" + + +CONCISE SUMMARY:""" +PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) diff --git a/langchain/chains/transform.py b/langchain/chains/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..f363567163e217bd94f5c959b6f2881637d67c9c --- /dev/null +++ b/langchain/chains/transform.py @@ -0,0 +1,41 @@ +"""Chain that runs an arbitrary python function.""" +from typing import Callable, Dict, List + +from pydantic import BaseModel + +from langchain.chains.base import Chain + + +class TransformChain(Chain, BaseModel): + """Chain transform chain output. + + Example: + .. code-block:: python + + from langchain import TransformChain + transform_chain = TransformChain(input_variables=["text"], + output_variables["entities"], transform=func()) + """ + + input_variables: List[str] + output_variables: List[str] + transform: Callable[[Dict[str, str]], Dict[str, str]] + + @property + def input_keys(self) -> List[str]: + """Expect input keys. + + :meta private: + """ + return self.input_variables + + @property + def output_keys(self) -> List[str]: + """Return output keys. + + :meta private: + """ + return self.output_variables + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + return self.transform(inputs) diff --git a/langchain/chains/vector_db_qa/__pycache__/__init__.cpython-39.pyc b/langchain/chains/vector_db_qa/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..997bd211ffde0616ecae2c156f935499e0f61985 Binary files /dev/null and b/langchain/chains/vector_db_qa/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chains/vector_db_qa/__pycache__/base.cpython-39.pyc b/langchain/chains/vector_db_qa/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29c20b4c7a952da26902a0f9066183a4e112b659 Binary files /dev/null and b/langchain/chains/vector_db_qa/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chat_models/__init__.py b/langchain/chat_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..88bafc7e7a28a087eb51af00cf3588846e2ce988 --- /dev/null +++ b/langchain/chat_models/__init__.py @@ -0,0 +1,5 @@ +from langchain.chat_models.azure_openai import AzureChatOpenAI +from langchain.chat_models.openai import ChatOpenAI +from langchain.chat_models.promptlayer_openai import PromptLayerChatOpenAI + +__all__ = ["ChatOpenAI", "AzureChatOpenAI", "PromptLayerChatOpenAI"] diff --git a/langchain/chat_models/__pycache__/__init__.cpython-39.pyc b/langchain/chat_models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e10f65c00447bb0e5f8c7c0f9d73efc9dd6a528b Binary files /dev/null and b/langchain/chat_models/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/chat_models/__pycache__/azure_openai.cpython-39.pyc b/langchain/chat_models/__pycache__/azure_openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00cd6ef10357da7719ba82efd85ca34e66bd2242 Binary files /dev/null and b/langchain/chat_models/__pycache__/azure_openai.cpython-39.pyc differ diff --git a/langchain/chat_models/__pycache__/base.cpython-39.pyc b/langchain/chat_models/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca68bff3ea7ed1e28138572173ddf4b884d3e145 Binary files /dev/null and b/langchain/chat_models/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/chat_models/__pycache__/openai.cpython-39.pyc b/langchain/chat_models/__pycache__/openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..525ea5804ba506e9fd14403617e79050507ea1ff Binary files /dev/null and b/langchain/chat_models/__pycache__/openai.cpython-39.pyc differ diff --git a/langchain/chat_models/__pycache__/promptlayer_openai.cpython-39.pyc b/langchain/chat_models/__pycache__/promptlayer_openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0664e301af0d622d8b29aca2bd140797e10602c Binary files /dev/null and b/langchain/chat_models/__pycache__/promptlayer_openai.cpython-39.pyc differ diff --git a/langchain/chat_models/azure_openai.py b/langchain/chat_models/azure_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..37f00d5017e63302bd8e2f2a7fd6def237c64caf --- /dev/null +++ b/langchain/chat_models/azure_openai.py @@ -0,0 +1,105 @@ +"""Azure OpenAI chat wrapper.""" +from __future__ import annotations + +import logging +from typing import Any, Dict + +from pydantic import root_validator + +from langchain.chat_models.openai import ( + ChatOpenAI, +) +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__file__) + + +class AzureChatOpenAI(ChatOpenAI): + """Wrapper around Azure OpenAI Chat Completion API. To use this class you + must have a deployed model on Azure OpenAI. Use `deployment_name` in the + constructor to refer to the "Model deployment name" in the Azure portal. + + In addition, you should have the ``openai`` python package installed, and the + following environment variables set or passed in constructor in lower case: + - ``OPENAI_API_TYPE`` (default: ``azure``) + - ``OPENAI_API_KEY`` + - ``OPENAI_API_BASE`` + - ``OPENAI_API_VERSION`` + + For exmaple, if you have `gpt-35-turbo` deployed, with the deployment name + `35-turbo-dev`, the constructor should look like: + + .. code-block:: python + AzureChatOpenAI( + deployment_name="35-turbo-dev", + openai_api_version="2023-03-15-preview", + ) + + Be aware the API version may change. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + """ + + deployment_name: str = "" + openai_api_type: str = "azure" + openai_api_base: str = "" + openai_api_version: str = "" + openai_api_key: str = "" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, + "openai_api_key", + "OPENAI_API_KEY", + ) + openai_api_base = get_from_dict_or_env( + values, + "openai_api_base", + "OPENAI_API_BASE", + ) + openai_api_version = get_from_dict_or_env( + values, + "openai_api_version", + "OPENAI_API_VERSION", + ) + openai_api_type = get_from_dict_or_env( + values, + "openai_api_type", + "OPENAI_API_TYPE", + ) + try: + import openai + + openai.api_type = openai_api_type + openai.api_base = openai_api_base + openai.api_version = openai_api_version + openai.api_key = openai_api_key + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + try: + values["client"] = openai.ChatCompletion + except AttributeError: + raise ValueError( + "`openai` has no `ChatCompletion` attribute, this is likely " + "due to an old version of the openai package. Try upgrading it " + "with `pip install --upgrade openai`." + ) + if values["n"] < 1: + raise ValueError("n must be at least 1.") + if values["n"] > 1 and values["streaming"]: + raise ValueError("n must be 1 when streaming.") + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + return { + **super()._default_params, + "engine": self.deployment_name, + } diff --git a/langchain/chat_models/base.py b/langchain/chat_models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf9d4fab66a65cd802344c325e2cd7ebf72bc55 --- /dev/null +++ b/langchain/chat_models/base.py @@ -0,0 +1,145 @@ +from abc import ABC, abstractmethod +from typing import List, Optional + +from pydantic import BaseModel, Extra, Field, validator + +import langchain +from langchain.callbacks import get_callback_manager +from langchain.callbacks.base import BaseCallbackManager +from langchain.schema import ( + AIMessage, + BaseLanguageModel, + BaseMessage, + ChatGeneration, + ChatResult, + HumanMessage, + LLMResult, + PromptValue, +) + + +def _get_verbosity() -> bool: + return langchain.verbose + + +class BaseChatModel(BaseLanguageModel, BaseModel, ABC): + verbose: bool = Field(default_factory=_get_verbosity) + """Whether to print out response text.""" + callback_manager: BaseCallbackManager = Field(default_factory=get_callback_manager) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @validator("callback_manager", pre=True, always=True) + def set_callback_manager( + cls, callback_manager: Optional[BaseCallbackManager] + ) -> BaseCallbackManager: + """If callback manager is None, set it. + + This allows users to pass in None as callback manager, which is a nice UX. + """ + return callback_manager or get_callback_manager() + + def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict: + return {} + + def generate( + self, messages: List[List[BaseMessage]], stop: Optional[List[str]] = None + ) -> LLMResult: + """Top Level call""" + results = [self._generate(m, stop=stop) for m in messages] + llm_output = self._combine_llm_outputs([res.llm_output for res in results]) + generations = [res.generations for res in results] + return LLMResult(generations=generations, llm_output=llm_output) + + async def agenerate( + self, messages: List[List[BaseMessage]], stop: Optional[List[str]] = None + ) -> LLMResult: + """Top Level call""" + results = [await self._agenerate(m, stop=stop) for m in messages] + llm_output = self._combine_llm_outputs([res.llm_output for res in results]) + generations = [res.generations for res in results] + return LLMResult(generations=generations, llm_output=llm_output) + + def generate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + prompt_messages = [p.to_messages() for p in prompts] + prompt_strings = [p.to_string() for p in prompts] + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose + ) + try: + output = self.generate(prompt_messages, stop=stop) + except (KeyboardInterrupt, Exception) as e: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + self.callback_manager.on_llm_end(output, verbose=self.verbose) + return output + + async def agenerate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + prompt_messages = [p.to_messages() for p in prompts] + prompt_strings = [p.to_string() for p in prompts] + if self.callback_manager.is_async: + await self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose + ) + else: + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompt_strings, verbose=self.verbose + ) + try: + output = await self.agenerate(prompt_messages, stop=stop) + except (KeyboardInterrupt, Exception) as e: + if self.callback_manager.is_async: + await self.callback_manager.on_llm_error(e, verbose=self.verbose) + else: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + if self.callback_manager.is_async: + await self.callback_manager.on_llm_end(output, verbose=self.verbose) + else: + self.callback_manager.on_llm_end(output, verbose=self.verbose) + return output + + @abstractmethod + def _generate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + """Top Level call""" + + @abstractmethod + async def _agenerate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + """Top Level call""" + + def __call__( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> BaseMessage: + return self._generate(messages, stop=stop).generations[0].message + + def call_as_llm(self, message: str, stop: Optional[List[str]] = None) -> str: + result = self([HumanMessage(content=message)], stop=stop) + return result.content + + +class SimpleChatModel(BaseChatModel): + def _generate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + output_str = self._call(messages, stop=stop) + message = AIMessage(content=output_str) + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + @abstractmethod + def _call( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> str: + """Simpler interface.""" diff --git a/langchain/chat_models/openai.py b/langchain/chat_models/openai.py new file mode 100644 index 0000000000000000000000000000000000000000..24d19d7a048a617ce3cf2fe3d5d4a76657653453 --- /dev/null +++ b/langchain/chat_models/openai.py @@ -0,0 +1,375 @@ +"""OpenAI chat wrapper.""" +from __future__ import annotations + +import logging +import sys +from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple + +from pydantic import BaseModel, Extra, Field, root_validator +from tenacity import ( + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from langchain.chat_models.base import BaseChatModel +from langchain.schema import ( + AIMessage, + BaseMessage, + ChatGeneration, + ChatMessage, + ChatResult, + HumanMessage, + SystemMessage, +) +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__file__) + + +def _create_retry_decorator(llm: ChatOpenAI) -> Callable[[Any], Any]: + import openai + + min_seconds = 4 + max_seconds = 10 + # Wait 2^x * 1 second between each retry starting with + # 4 seconds, then up to 10 seconds, then 10 seconds afterwards + return retry( + reraise=True, + stop=stop_after_attempt(llm.max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=( + retry_if_exception_type(openai.error.Timeout) + | retry_if_exception_type(openai.error.APIError) + | retry_if_exception_type(openai.error.APIConnectionError) + | retry_if_exception_type(openai.error.RateLimitError) + | retry_if_exception_type(openai.error.ServiceUnavailableError) + ), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + + +async def acompletion_with_retry(llm: ChatOpenAI, **kwargs: Any) -> Any: + """Use tenacity to retry the async completion call.""" + retry_decorator = _create_retry_decorator(llm) + + @retry_decorator + async def _completion_with_retry(**kwargs: Any) -> Any: + # Use OpenAI's async api https://github.com/openai/openai-python#async-api + return await llm.client.acreate(**kwargs) + + return await _completion_with_retry(**kwargs) + + +def _convert_dict_to_message(_dict: dict) -> BaseMessage: + role = _dict["role"] + if role == "user": + return HumanMessage(content=_dict["content"]) + elif role == "assistant": + return AIMessage(content=_dict["content"]) + elif role == "system": + return SystemMessage(content=_dict["content"]) + else: + return ChatMessage(content=_dict["content"], role=role) + + +def _convert_message_to_dict(message: BaseMessage) -> dict: + if isinstance(message, ChatMessage): + message_dict = {"role": message.role, "content": message.content} + elif isinstance(message, HumanMessage): + message_dict = {"role": "user", "content": message.content} + elif isinstance(message, AIMessage): + message_dict = {"role": "assistant", "content": message.content} + elif isinstance(message, SystemMessage): + message_dict = {"role": "system", "content": message.content} + else: + raise ValueError(f"Got unknown type {message}") + if "name" in message.additional_kwargs: + message_dict["name"] = message.additional_kwargs["name"] + return message_dict + + +def _create_chat_result(response: Mapping[str, Any]) -> ChatResult: + generations = [] + for res in response["choices"]: + message = _convert_dict_to_message(res["message"]) + gen = ChatGeneration(message=message) + generations.append(gen) + llm_output = {"token_usage": response["usage"]} + return ChatResult(generations=generations, llm_output=llm_output) + + +class ChatOpenAI(BaseChatModel, BaseModel): + """Wrapper around OpenAI Chat large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.chat_models import ChatOpenAI + openai = ChatOpenAI(model_name="gpt-3.5-turbo") + """ + + client: Any #: :meta private: + model_name: str = "gpt-3.5-turbo" + """Model name to use.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + openai_api_key: Optional[str] = None + request_timeout: int = 60 + """Timeout in seconds for the OpenAPI request.""" + max_retries: int = 6 + """Maximum number of retries to make when generating.""" + streaming: bool = False + """Whether to stream the results or not.""" + n: int = 1 + """Number of chat completions to generate for each prompt.""" + max_tokens: Optional[int] = None + """Maximum number of tokens to generate.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.ignore + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + try: + values["client"] = openai.ChatCompletion + except AttributeError: + raise ValueError( + "`openai` has no `ChatCompletion` attribute, this is likely " + "due to an old version of the openai package. Try upgrading it " + "with `pip install --upgrade openai`." + ) + if values["n"] < 1: + raise ValueError("n must be at least 1.") + if values["n"] > 1 and values["streaming"]: + raise ValueError("n must be 1 when streaming.") + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + return { + "model": self.model_name, + "request_timeout": self.request_timeout, + "max_tokens": self.max_tokens, + "stream": self.streaming, + "n": self.n, + **self.model_kwargs, + } + + def _create_retry_decorator(self) -> Callable[[Any], Any]: + import openai + + min_seconds = 4 + max_seconds = 10 + # Wait 2^x * 1 second between each retry starting with + # 4 seconds, then up to 10 seconds, then 10 seconds afterwards + return retry( + reraise=True, + stop=stop_after_attempt(self.max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=( + retry_if_exception_type(openai.error.Timeout) + | retry_if_exception_type(openai.error.APIError) + | retry_if_exception_type(openai.error.APIConnectionError) + | retry_if_exception_type(openai.error.RateLimitError) + | retry_if_exception_type(openai.error.ServiceUnavailableError) + ), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + + def completion_with_retry(self, **kwargs: Any) -> Any: + """Use tenacity to retry the completion call.""" + retry_decorator = self._create_retry_decorator() + + @retry_decorator + def _completion_with_retry(**kwargs: Any) -> Any: + return self.client.create(**kwargs) + + return _completion_with_retry(**kwargs) + + def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict: + overall_token_usage: dict = {} + for output in llm_outputs: + if output is None: + # Happens in streaming + continue + token_usage = output["token_usage"] + for k, v in token_usage.items(): + if k in overall_token_usage: + overall_token_usage[k] += v + else: + overall_token_usage[k] = v + return {"token_usage": overall_token_usage} + + def _generate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + message_dicts, params = self._create_message_dicts(messages, stop) + if self.streaming: + inner_completion = "" + role = "assistant" + params["stream"] = True + for stream_resp in self.completion_with_retry( + messages=message_dicts, **params + ): + role = stream_resp["choices"][0]["delta"].get("role", role) + token = stream_resp["choices"][0]["delta"].get("content", "") + inner_completion += token + self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + message = _convert_dict_to_message( + {"content": inner_completion, "role": role} + ) + return ChatResult(generations=[ChatGeneration(message=message)]) + response = self.completion_with_retry(messages=message_dicts, **params) + return _create_chat_result(response) + + def _create_message_dicts( + self, messages: List[BaseMessage], stop: Optional[List[str]] + ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]: + params: Dict[str, Any] = {**{"model": self.model_name}, **self._default_params} + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + message_dicts = [_convert_message_to_dict(m) for m in messages] + return message_dicts, params + + async def _agenerate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + message_dicts, params = self._create_message_dicts(messages, stop) + if self.streaming: + inner_completion = "" + role = "assistant" + params["stream"] = True + async for stream_resp in await acompletion_with_retry( + self, messages=message_dicts, **params + ): + role = stream_resp["choices"][0]["delta"].get("role", role) + token = stream_resp["choices"][0]["delta"].get("content", "") + inner_completion += token + if self.callback_manager.is_async: + await self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + else: + self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + message = _convert_dict_to_message( + {"content": inner_completion, "role": role} + ) + return ChatResult(generations=[ChatGeneration(message=message)]) + else: + response = await acompletion_with_retry( + self, messages=message_dicts, **params + ) + return _create_chat_result(response) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + def get_num_tokens(self, text: str) -> int: + """Calculate num tokens with tiktoken package.""" + # tiktoken NOT supported for Python 3.8 or below + if sys.version_info[1] <= 8: + return super().get_num_tokens(text) + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to calculate get_num_tokens. " + "Please it install it with `pip install tiktoken`." + ) + # create a GPT-3.5-Turbo encoder instance + enc = tiktoken.encoding_for_model(self.model_name) + + # encode the text using the GPT-3.5-Turbo encoder + tokenized_text = enc.encode(text) + + # calculate the number of tokens in the encoded text + return len(tokenized_text) + + def get_num_tokens_from_messages( + self, messages: List[BaseMessage], model: str = "gpt-3.5-turbo-0301" + ) -> int: + """Calculate num tokens for gpt-3.5-turbo with tiktoken package.""" + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to calculate get_num_tokens. " + "Please it install it with `pip install tiktoken`." + ) + + """Returns the number of tokens used by a list of messages.""" + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") + if model == "gpt-3.5-turbo-0301": # note: future models may deviate from this + num_tokens = 0 + messages_dict = [_convert_message_to_dict(m) for m in messages] + for message in messages_dict: + # every message follows {role/name}\n{content}\n + num_tokens += 4 + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": # if there's a name, the role is omitted + num_tokens += -1 # role is always required and always 1 token + num_tokens += 2 # every reply is primed with assistant + return num_tokens + else: + raise NotImplementedError( + f"get_num_tokens_from_messages() is not presently implemented " + f"for model {model}." + "See https://github.com/openai/openai-python/blob/main/chatml.md for " + "information on how messages are converted to tokens." + ) diff --git a/langchain/chat_models/promptlayer_openai.py b/langchain/chat_models/promptlayer_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..a42acbe2412dd0d7006ae25e8873f3aef84b1cfe --- /dev/null +++ b/langchain/chat_models/promptlayer_openai.py @@ -0,0 +1,103 @@ +"""PromptLayer wrapper.""" +import datetime +from typing import List, Optional + +from pydantic import BaseModel + +from langchain.chat_models import ChatOpenAI +from langchain.schema import BaseMessage, ChatResult + + +class PromptLayerChatOpenAI(ChatOpenAI, BaseModel): + """Wrapper around OpenAI Chat large language models and PromptLayer. + + To use, you should have the ``openai`` and ``promptlayer`` python + package installed, and the environment variable ``OPENAI_API_KEY`` + and ``PROMPTLAYER_API_KEY`` set with your openAI API key and + promptlayer key respectively. + + All parameters that can be passed to the OpenAI LLM can also + be passed here. The PromptLayerChatOpenAI adds to optional + parameters: + ``pl_tags``: List of strings to tag the request with. + ``return_pl_id``: If True, the PromptLayer request ID will be + returned in the ``generation_info`` field of the + ``Generation`` object. + + Example: + .. code-block:: python + + from langchain.chat_models import PromptLayerChatOpenAI + openai = PromptLayerChatOpenAI(model_name="gpt-3.5-turbo") + """ + + pl_tags: Optional[List[str]] + return_pl_id: Optional[bool] = False + + def _generate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + """Call ChatOpenAI generate and then call PromptLayer API to log the request.""" + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = super()._generate(messages, stop) + request_end_time = datetime.datetime.now().timestamp() + message_dicts, params = super()._create_message_dicts(messages, stop) + for i, generation in enumerate(generated_responses.generations): + response_dict, params = super()._create_message_dicts( + [generation.message], stop + ) + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerChatOpenAI", + "langchain", + message_dicts, + params, + self.pl_tags, + response_dict, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses + + async def _agenerate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + """Call ChatOpenAI agenerate and then call PromptLayer to log.""" + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = await super()._agenerate(messages, stop) + request_end_time = datetime.datetime.now().timestamp() + message_dicts, params = super()._create_message_dicts(messages, stop) + for i, generation in enumerate(generated_responses.generations): + response_dict, params = super()._create_message_dicts( + [generation.message], stop + ) + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerChatOpenAI.async", + "langchain", + message_dicts, + params, + self.pl_tags, + response_dict, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses diff --git a/langchain/docker-compose.yaml b/langchain/docker-compose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1558cdb864b9d1d763202d22b458c5c52a078d9 --- /dev/null +++ b/langchain/docker-compose.yaml @@ -0,0 +1,29 @@ +version: '3' +services: + langchain-frontend: + image: notlangchain/langchainplus-frontend:latest + ports: + - 4173:4173 + environment: + - BACKEND_URL=http://langchain-backend:8000 + - PUBLIC_BASE_URL=http://localhost:8000 + - PUBLIC_DEV_MODE=true + depends_on: + - langchain-backend + langchain-backend: + image: notlangchain/langchainplus:latest + environment: + - PORT=8000 + - LANGCHAIN_ENV=local + ports: + - 8000:8000 + depends_on: + - langchain-db + langchain-db: + image: postgres:14.1 + environment: + - POSTGRES_PASSWORD=postgres + - POSTGRES_USER=postgres + - POSTGRES_DB=postgres + ports: + - 5432:5432 diff --git a/langchain/docstore/__init__.py b/langchain/docstore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f5e20de3db79384148c61b0435edaf4a9295213 --- /dev/null +++ b/langchain/docstore/__init__.py @@ -0,0 +1,5 @@ +"""Wrappers on top of docstores.""" +from langchain.docstore.in_memory import InMemoryDocstore +from langchain.docstore.wikipedia import Wikipedia + +__all__ = ["InMemoryDocstore", "Wikipedia"] diff --git a/langchain/docstore/__pycache__/__init__.cpython-39.pyc b/langchain/docstore/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95c863f8c615467ba911ecb701ae6fdf5653f756 Binary files /dev/null and b/langchain/docstore/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/docstore/__pycache__/base.cpython-39.pyc b/langchain/docstore/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..672e1d050012f6ca83bd3a7dd96497a4454dbfcb Binary files /dev/null and b/langchain/docstore/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/docstore/__pycache__/document.cpython-39.pyc b/langchain/docstore/__pycache__/document.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cce4d57e13c40ca3ad755e93c22dd3fcb6fb4399 Binary files /dev/null and b/langchain/docstore/__pycache__/document.cpython-39.pyc differ diff --git a/langchain/docstore/__pycache__/in_memory.cpython-39.pyc b/langchain/docstore/__pycache__/in_memory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4da9abff173ab82adff89022dbe1fe51357af3de Binary files /dev/null and b/langchain/docstore/__pycache__/in_memory.cpython-39.pyc differ diff --git a/langchain/docstore/__pycache__/wikipedia.cpython-39.pyc b/langchain/docstore/__pycache__/wikipedia.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7fdebaa193ad59645e39e2fbad076d49ec4b8094 Binary files /dev/null and b/langchain/docstore/__pycache__/wikipedia.cpython-39.pyc differ diff --git a/langchain/docstore/base.py b/langchain/docstore/base.py new file mode 100644 index 0000000000000000000000000000000000000000..4a91680c7327e2e61d760dc60dcaaaf974cc0310 --- /dev/null +++ b/langchain/docstore/base.py @@ -0,0 +1,25 @@ +"""Interface to access to place that stores documents.""" +from abc import ABC, abstractmethod +from typing import Dict, Union + +from langchain.docstore.document import Document + + +class Docstore(ABC): + """Interface to access to place that stores documents.""" + + @abstractmethod + def search(self, search: str) -> Union[str, Document]: + """Search for document. + + If page exists, return the page summary, and a Document object. + If page does not exist, return similar entries. + """ + + +class AddableMixin(ABC): + """Mixin class that supports adding texts.""" + + @abstractmethod + def add(self, texts: Dict[str, Document]) -> None: + """Add more documents.""" diff --git a/langchain/docstore/document.py b/langchain/docstore/document.py new file mode 100644 index 0000000000000000000000000000000000000000..1c33318db283390dca2bb62a8cac08a633fd5482 --- /dev/null +++ b/langchain/docstore/document.py @@ -0,0 +1,3 @@ +from langchain.schema import Document + +__all__ = ["Document"] diff --git a/langchain/docstore/in_memory.py b/langchain/docstore/in_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..f1e361025817c7e2df543cda518acba760ecbfb9 --- /dev/null +++ b/langchain/docstore/in_memory.py @@ -0,0 +1,27 @@ +"""Simple in memory docstore in the form of a dict.""" +from typing import Dict, Union + +from langchain.docstore.base import AddableMixin, Docstore +from langchain.docstore.document import Document + + +class InMemoryDocstore(Docstore, AddableMixin): + """Simple in memory docstore in the form of a dict.""" + + def __init__(self, _dict: Dict[str, Document]): + """Initialize with dict.""" + self._dict = _dict + + def add(self, texts: Dict[str, Document]) -> None: + """Add texts to in memory dictionary.""" + overlapping = set(texts).intersection(self._dict) + if overlapping: + raise ValueError(f"Tried to add ids that already exist: {overlapping}") + self._dict = dict(self._dict, **texts) + + def search(self, search: str) -> Union[str, Document]: + """Search via direct lookup.""" + if search not in self._dict: + return f"ID {search} not found." + else: + return self._dict[search] diff --git a/langchain/docstore/wikipedia.py b/langchain/docstore/wikipedia.py new file mode 100644 index 0000000000000000000000000000000000000000..8882fb23b68983e2050b32c66f79fdc03589f046 --- /dev/null +++ b/langchain/docstore/wikipedia.py @@ -0,0 +1,41 @@ +"""Wrapper around wikipedia API.""" + + +from typing import Union + +from langchain.docstore.base import Docstore +from langchain.docstore.document import Document + + +class Wikipedia(Docstore): + """Wrapper around wikipedia API.""" + + def __init__(self) -> None: + """Check that wikipedia package is installed.""" + try: + import wikipedia # noqa: F401 + except ImportError: + raise ValueError( + "Could not import wikipedia python package. " + "Please install it with `pip install wikipedia`." + ) + + def search(self, search: str) -> Union[str, Document]: + """Try to search for wiki page. + + If page exists, return the page summary, and a PageWithLookups object. + If page does not exist, return similar entries. + """ + import wikipedia + + try: + page_content = wikipedia.page(search).content + url = wikipedia.page(search).url + result: Union[str, Document] = Document( + page_content=page_content, metadata={"page": url} + ) + except wikipedia.PageError: + result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}" + except wikipedia.DisambiguationError: + result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}" + return result diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fb4f22ff31120160aae98eaaa5be22376f6d4bce --- /dev/null +++ b/langchain/document_loaders/__init__.py @@ -0,0 +1,107 @@ +"""All different types of document loaders.""" + +from langchain.document_loaders.airbyte_json import AirbyteJSONLoader +from langchain.document_loaders.azlyrics import AZLyricsLoader +from langchain.document_loaders.blackboard import BlackboardLoader +from langchain.document_loaders.college_confidential import CollegeConfidentialLoader +from langchain.document_loaders.conllu import CoNLLULoader +from langchain.document_loaders.csv_loader import CSVLoader +from langchain.document_loaders.directory import DirectoryLoader +from langchain.document_loaders.email import UnstructuredEmailLoader +from langchain.document_loaders.evernote import EverNoteLoader +from langchain.document_loaders.facebook_chat import FacebookChatLoader +from langchain.document_loaders.gcs_directory import GCSDirectoryLoader +from langchain.document_loaders.gcs_file import GCSFileLoader +from langchain.document_loaders.gitbook import GitbookLoader +from langchain.document_loaders.googledrive import GoogleDriveLoader +from langchain.document_loaders.gutenberg import GutenbergLoader +from langchain.document_loaders.hn import HNLoader +from langchain.document_loaders.html import UnstructuredHTMLLoader +from langchain.document_loaders.html_bs import BSHTMLLoader +from langchain.document_loaders.ifixit import IFixitLoader +from langchain.document_loaders.image import UnstructuredImageLoader +from langchain.document_loaders.imsdb import IMSDbLoader +from langchain.document_loaders.markdown import UnstructuredMarkdownLoader +from langchain.document_loaders.notebook import NotebookLoader +from langchain.document_loaders.notion import NotionDirectoryLoader +from langchain.document_loaders.obsidian import ObsidianLoader +from langchain.document_loaders.pdf import ( + OnlinePDFLoader, + PDFMinerLoader, + PyMuPDFLoader, + PyPDFLoader, + UnstructuredPDFLoader, +) +from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader +from langchain.document_loaders.readthedocs import ReadTheDocsLoader +from langchain.document_loaders.roam import RoamLoader +from langchain.document_loaders.s3_directory import S3DirectoryLoader +from langchain.document_loaders.s3_file import S3FileLoader +from langchain.document_loaders.srt import SRTLoader +from langchain.document_loaders.telegram import TelegramChatLoader +from langchain.document_loaders.text import TextLoader +from langchain.document_loaders.unstructured import ( + UnstructuredFileIOLoader, + UnstructuredFileLoader, +) +from langchain.document_loaders.url import UnstructuredURLLoader +from langchain.document_loaders.web_base import WebBaseLoader +from langchain.document_loaders.word_document import UnstructuredWordDocumentLoader +from langchain.document_loaders.youtube import ( + GoogleApiClient, + GoogleApiYoutubeLoader, + YoutubeLoader, +) + +"""Legacy: only for backwards compat. use PyPDFLoader instead""" +PagedPDFSplitter = PyPDFLoader + +__all__ = [ + "UnstructuredFileLoader", + "UnstructuredFileIOLoader", + "UnstructuredURLLoader", + "DirectoryLoader", + "NotionDirectoryLoader", + "ReadTheDocsLoader", + "GoogleDriveLoader", + "UnstructuredHTMLLoader", + "BSHTMLLoader", + "UnstructuredPowerPointLoader", + "UnstructuredWordDocumentLoader", + "UnstructuredPDFLoader", + "UnstructuredImageLoader", + "ObsidianLoader", + "UnstructuredEmailLoader", + "UnstructuredMarkdownLoader", + "RoamLoader", + "YoutubeLoader", + "S3FileLoader", + "TextLoader", + "HNLoader", + "GitbookLoader", + "S3DirectoryLoader", + "GCSFileLoader", + "GCSDirectoryLoader", + "WebBaseLoader", + "IMSDbLoader", + "AZLyricsLoader", + "CollegeConfidentialLoader", + "IFixitLoader", + "GutenbergLoader", + "PagedPDFSplitter", + "PyPDFLoader", + "EverNoteLoader", + "AirbyteJSONLoader", + "OnlinePDFLoader", + "PDFMinerLoader", + "PyMuPDFLoader", + "TelegramChatLoader", + "SRTLoader", + "FacebookChatLoader", + "NotebookLoader", + "CoNLLULoader", + "GoogleApiYoutubeLoader", + "GoogleApiClient", + "CSVLoader", + "BlackboardLoader", +] diff --git a/langchain/document_loaders/__pycache__/__init__.cpython-39.pyc b/langchain/document_loaders/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff28f16806786d52e06ff62e67231065454cf057 Binary files /dev/null and b/langchain/document_loaders/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/airbyte_json.cpython-39.pyc b/langchain/document_loaders/__pycache__/airbyte_json.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f78b205a4d8fe1827ccd645f1b3cd79fd20fe64 Binary files /dev/null and b/langchain/document_loaders/__pycache__/airbyte_json.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/azlyrics.cpython-39.pyc b/langchain/document_loaders/__pycache__/azlyrics.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccaefe35f432b8ba2bd08b23f9c059b5b3f57bdb Binary files /dev/null and b/langchain/document_loaders/__pycache__/azlyrics.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/base.cpython-39.pyc b/langchain/document_loaders/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d37595fc2f0e439fce2beba904fab07dd165041 Binary files /dev/null and b/langchain/document_loaders/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/blackboard.cpython-39.pyc b/langchain/document_loaders/__pycache__/blackboard.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1b7bc4c76b13fdcc44b94ef659079510b09ea88 Binary files /dev/null and b/langchain/document_loaders/__pycache__/blackboard.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/college_confidential.cpython-39.pyc b/langchain/document_loaders/__pycache__/college_confidential.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec0723d9cc7a1c4559dd228a6dee88655472d71a Binary files /dev/null and b/langchain/document_loaders/__pycache__/college_confidential.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/conllu.cpython-39.pyc b/langchain/document_loaders/__pycache__/conllu.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..126fbe7bb950ce34ca7aa0242e88bafb523ff620 Binary files /dev/null and b/langchain/document_loaders/__pycache__/conllu.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/csv_loader.cpython-39.pyc b/langchain/document_loaders/__pycache__/csv_loader.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7269a9bd23ffb7a9790f8592aee878e7ef2b3bb Binary files /dev/null and b/langchain/document_loaders/__pycache__/csv_loader.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/directory.cpython-39.pyc b/langchain/document_loaders/__pycache__/directory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ada1da4b11e74448b5cb58af5a0dd09f238581b Binary files /dev/null and b/langchain/document_loaders/__pycache__/directory.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/docx.cpython-39.pyc b/langchain/document_loaders/__pycache__/docx.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef27571fffe24bd6578958f1f9045191b181ad5b Binary files /dev/null and b/langchain/document_loaders/__pycache__/docx.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/email.cpython-39.pyc b/langchain/document_loaders/__pycache__/email.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab5bb7c39ec79abf8fb60985b0b7b03fef5aa158 Binary files /dev/null and b/langchain/document_loaders/__pycache__/email.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/evernote.cpython-39.pyc b/langchain/document_loaders/__pycache__/evernote.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d75e55e27b9d5739addff9198fd46f298e0ab3f5 Binary files /dev/null and b/langchain/document_loaders/__pycache__/evernote.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/facebook_chat.cpython-39.pyc b/langchain/document_loaders/__pycache__/facebook_chat.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..070451810c7d3a93a679bc40aaeef0c3831bace2 Binary files /dev/null and b/langchain/document_loaders/__pycache__/facebook_chat.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/gcs_directory.cpython-39.pyc b/langchain/document_loaders/__pycache__/gcs_directory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd32f010532a3e9ac574ff1ff5f3609f10825418 Binary files /dev/null and b/langchain/document_loaders/__pycache__/gcs_directory.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/gcs_file.cpython-39.pyc b/langchain/document_loaders/__pycache__/gcs_file.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6457f944edd51080c7afa54c6a44d3d24a02d02f Binary files /dev/null and b/langchain/document_loaders/__pycache__/gcs_file.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/gitbook.cpython-39.pyc b/langchain/document_loaders/__pycache__/gitbook.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d90945d5c0597006ab65db614140fb387732790a Binary files /dev/null and b/langchain/document_loaders/__pycache__/gitbook.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/googledrive.cpython-39.pyc b/langchain/document_loaders/__pycache__/googledrive.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b81054b7867ae80221aa37cd76ed7e4e8f68ba2 Binary files /dev/null and b/langchain/document_loaders/__pycache__/googledrive.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/gutenberg.cpython-39.pyc b/langchain/document_loaders/__pycache__/gutenberg.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27f9cbc5cb1341eae5a22132f95c92b1a09c6425 Binary files /dev/null and b/langchain/document_loaders/__pycache__/gutenberg.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/hn.cpython-39.pyc b/langchain/document_loaders/__pycache__/hn.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05338021dac4393fa6fe33c1268ba2ec3aeeac6b Binary files /dev/null and b/langchain/document_loaders/__pycache__/hn.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/html.cpython-39.pyc b/langchain/document_loaders/__pycache__/html.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..019824fa164f9ff2ce482c787ec2ed27410f9473 Binary files /dev/null and b/langchain/document_loaders/__pycache__/html.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/html_bs.cpython-39.pyc b/langchain/document_loaders/__pycache__/html_bs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7cbc2e67be876018c58deebecb65679d637cd32a Binary files /dev/null and b/langchain/document_loaders/__pycache__/html_bs.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/ifixit.cpython-39.pyc b/langchain/document_loaders/__pycache__/ifixit.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1d9f628e5bc547833ec7dd367e0995486c528c0 Binary files /dev/null and b/langchain/document_loaders/__pycache__/ifixit.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/image.cpython-39.pyc b/langchain/document_loaders/__pycache__/image.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..801be4be9bb334f04036e6f3e3683be352fbc79f Binary files /dev/null and b/langchain/document_loaders/__pycache__/image.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/imsdb.cpython-39.pyc b/langchain/document_loaders/__pycache__/imsdb.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83557f435d8c1681997950299bbc2e525bb4f27f Binary files /dev/null and b/langchain/document_loaders/__pycache__/imsdb.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/markdown.cpython-39.pyc b/langchain/document_loaders/__pycache__/markdown.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb200f2c22153f1800f4e241546a704f92fb39a8 Binary files /dev/null and b/langchain/document_loaders/__pycache__/markdown.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/notebook.cpython-39.pyc b/langchain/document_loaders/__pycache__/notebook.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a3b66ff7386929a3cb9367bca9f1e9418eeb0bd Binary files /dev/null and b/langchain/document_loaders/__pycache__/notebook.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/notion.cpython-39.pyc b/langchain/document_loaders/__pycache__/notion.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f1e0f19496f8a9dd494ee1129c27736506a3177 Binary files /dev/null and b/langchain/document_loaders/__pycache__/notion.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/obsidian.cpython-39.pyc b/langchain/document_loaders/__pycache__/obsidian.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3f3a31202d5ed47e5ffce3e3a30c28aae14afc5 Binary files /dev/null and b/langchain/document_loaders/__pycache__/obsidian.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/pdf.cpython-39.pyc b/langchain/document_loaders/__pycache__/pdf.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62911a22ba0afe37c061b48a8092b4db73c0cdc0 Binary files /dev/null and b/langchain/document_loaders/__pycache__/pdf.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/powerpoint.cpython-39.pyc b/langchain/document_loaders/__pycache__/powerpoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14e31a1d9aed044f611eec8ba667afade8dff996 Binary files /dev/null and b/langchain/document_loaders/__pycache__/powerpoint.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/readthedocs.cpython-39.pyc b/langchain/document_loaders/__pycache__/readthedocs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71e2b9a2c01ad8260ff6d3445332508427c20165 Binary files /dev/null and b/langchain/document_loaders/__pycache__/readthedocs.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/roam.cpython-39.pyc b/langchain/document_loaders/__pycache__/roam.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d5d8beddd4b8f08e0947f3ad0d2510ad2615a34 Binary files /dev/null and b/langchain/document_loaders/__pycache__/roam.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/s3_directory.cpython-39.pyc b/langchain/document_loaders/__pycache__/s3_directory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03e0f010817231cdafb1636344eb690d3f3ff5bd Binary files /dev/null and b/langchain/document_loaders/__pycache__/s3_directory.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/s3_file.cpython-39.pyc b/langchain/document_loaders/__pycache__/s3_file.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1340cf62f0c83ddfa2bbb5f79e2c81fa4e03c9c Binary files /dev/null and b/langchain/document_loaders/__pycache__/s3_file.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/srt.cpython-39.pyc b/langchain/document_loaders/__pycache__/srt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..834e426fd8df506b3b0c6ab1ab1c8e799ee46530 Binary files /dev/null and b/langchain/document_loaders/__pycache__/srt.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/telegram.cpython-39.pyc b/langchain/document_loaders/__pycache__/telegram.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..59263a7b4fef0e867745fd6d9c35791d4e388c4b Binary files /dev/null and b/langchain/document_loaders/__pycache__/telegram.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/text.cpython-39.pyc b/langchain/document_loaders/__pycache__/text.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d393e25687e67536e5828c789d26f3eb58028f02 Binary files /dev/null and b/langchain/document_loaders/__pycache__/text.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/unstructured.cpython-39.pyc b/langchain/document_loaders/__pycache__/unstructured.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d69960b85d28457e8b32309c90e2434ffb713c13 Binary files /dev/null and b/langchain/document_loaders/__pycache__/unstructured.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/url.cpython-39.pyc b/langchain/document_loaders/__pycache__/url.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6cbdd4ba4b281f9add6a29e9a0a673e72d27cb3 Binary files /dev/null and b/langchain/document_loaders/__pycache__/url.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/web_base.cpython-39.pyc b/langchain/document_loaders/__pycache__/web_base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc7320665118d9b9bccef27f0480e44dd1cceeaa Binary files /dev/null and b/langchain/document_loaders/__pycache__/web_base.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/word_document.cpython-39.pyc b/langchain/document_loaders/__pycache__/word_document.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10a58e3df7c03c561c75dbb375d9b7112d9e9cfb Binary files /dev/null and b/langchain/document_loaders/__pycache__/word_document.cpython-39.pyc differ diff --git a/langchain/document_loaders/__pycache__/youtube.cpython-39.pyc b/langchain/document_loaders/__pycache__/youtube.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc7a11a955e08ae6caafb0b976d1a930a6f72aa8 Binary files /dev/null and b/langchain/document_loaders/__pycache__/youtube.cpython-39.pyc differ diff --git a/langchain/document_loaders/airbyte_json.py b/langchain/document_loaders/airbyte_json.py new file mode 100644 index 0000000000000000000000000000000000000000..823267e6313191bc3650f7ae3f23418479497efe --- /dev/null +++ b/langchain/document_loaders/airbyte_json.py @@ -0,0 +1,41 @@ +"""Loader that loads local airbyte json files.""" +import json +from typing import Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def _stringify_value(val: Any) -> str: + if isinstance(val, str): + return val + elif isinstance(val, dict): + return "\n" + _stringify_dict(val) + elif isinstance(val, list): + return "\n".join(_stringify_value(v) for v in val) + else: + return str(val) + + +def _stringify_dict(data: dict) -> str: + text = "" + for key, value in data.items(): + text += key + ": " + _stringify_value(data[key]) + "\n" + return text + + +class AirbyteJSONLoader(BaseLoader): + """Loader that loads local airbyte json files.""" + + def __init__(self, file_path: str): + """Initialize with file path. This should start with '/tmp/airbyte_local/'.""" + self.file_path = file_path + + def load(self) -> List[Document]: + """Load file.""" + text = "" + for line in open(self.file_path, "r"): + data = json.loads(line)["_airbyte_data"] + text += _stringify_dict(data) + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/azlyrics.py b/langchain/document_loaders/azlyrics.py new file mode 100644 index 0000000000000000000000000000000000000000..0947946c1162174a6cebe0881a5f2821402e17be --- /dev/null +++ b/langchain/document_loaders/azlyrics.py @@ -0,0 +1,18 @@ +"""Loader that loads AZLyrics.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.web_base import WebBaseLoader + + +class AZLyricsLoader(WebBaseLoader): + """Loader that loads AZLyrics webpages.""" + + def load(self) -> List[Document]: + """Load webpage.""" + soup = self.scrape() + title = soup.title.text + lyrics = soup.find_all("div", {"class": ""})[2].text + text = title + lyrics + metadata = {"source": self.web_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/base.py b/langchain/document_loaders/base.py new file mode 100644 index 0000000000000000000000000000000000000000..d5784a747d64b9d9d216e8125cca60b4d37a20fd --- /dev/null +++ b/langchain/document_loaders/base.py @@ -0,0 +1,26 @@ +"""Base loader class.""" + +from abc import ABC, abstractmethod +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter + + +class BaseLoader(ABC): + """Base loader class.""" + + @abstractmethod + def load(self) -> List[Document]: + """Load data into document objects.""" + + def load_and_split( + self, text_splitter: Optional[TextSplitter] = None + ) -> List[Document]: + """Load documents and split into chunks.""" + if text_splitter is None: + _text_splitter: TextSplitter = RecursiveCharacterTextSplitter() + else: + _text_splitter = text_splitter + docs = self.load() + return _text_splitter.split_documents(docs) diff --git a/langchain/document_loaders/blackboard.py b/langchain/document_loaders/blackboard.py new file mode 100644 index 0000000000000000000000000000000000000000..ccddf19ed6d13bf54759caa0b90aabacb72c1718 --- /dev/null +++ b/langchain/document_loaders/blackboard.py @@ -0,0 +1,293 @@ +"""Loader that loads all documents from a blackboard course.""" +import contextlib +import re +from pathlib import Path +from typing import Any, List, Optional, Tuple +from urllib.parse import unquote + +from langchain.docstore.document import Document +from langchain.document_loaders.directory import DirectoryLoader +from langchain.document_loaders.pdf import PyPDFLoader +from langchain.document_loaders.web_base import WebBaseLoader + + +class BlackboardLoader(WebBaseLoader): + """Loader that loads all documents from a Blackboard course. + + This loader is not compatible with all Blackboard courses. It is only + compatible with courses that use the new Blackboard interface. + To use this loader, you must have the BbRouter cookie. You can get this + cookie by logging into the course and then copying the value of the + BbRouter cookie from the browser's developer tools. + + Example: + .. code-block:: python + + from langchain.document_loaders import BlackboardLoader + + loader = BlackboardLoader( + blackboard_course_url="https://blackboard.example.com/webapps/blackboard/execute/announcement?method=search&context=course_entry&course_id=_123456_1", + bbrouter="expires:12345...", + ) + documents = loader.load() + + """ + + base_url: str + folder_path: str + load_all_recursively: bool + + def __init__( + self, + blackboard_course_url: str, + bbrouter: str, + load_all_recursively: bool = True, + basic_auth: Optional[Tuple[str, str]] = None, + cookies: Optional[dict] = None, + ): + """Initialize with blackboard course url. + + The BbRouter cookie is required for most blackboard courses. + + Args: + blackboard_course_url: Blackboard course url. + bbrouter: BbRouter cookie. + load_all_recursively: If True, load all documents recursively. + basic_auth: Basic auth credentials. + cookies: Cookies. + + Raises: + ValueError: If blackboard course url is invalid. + """ + super().__init__(blackboard_course_url) + # Get base url + try: + self.base_url = blackboard_course_url.split("/webapps/blackboard")[0] + except IndexError: + raise ValueError( + "Invalid blackboard course url. " + "Please provide a url that starts with " + "https:///webapps/blackboard" + ) + if basic_auth is not None: + self.session.auth = basic_auth + # Combine cookies + if cookies is None: + cookies = {} + cookies.update({"BbRouter": bbrouter}) + self.session.cookies.update(cookies) + self.load_all_recursively = load_all_recursively + self.check_bs4() + + def check_bs4(self) -> None: + """Check if BeautifulSoup4 is installed. + + Raises: + ImportError: If BeautifulSoup4 is not installed. + """ + try: + import bs4 # noqa: F401 + except ImportError: + raise ImportError( + "BeautifulSoup4 is required for BlackboardLoader. " + "Please install it with `pip install beautifulsoup4`." + ) + + def load(self) -> List[Document]: + """Load data into document objects. + + Returns: + List of documents. + """ + if self.load_all_recursively: + soup_info = self.scrape() + self.folder_path = self._get_folder_path(soup_info) + relative_paths = self._get_paths(soup_info) + documents = [] + for path in relative_paths: + url = self.base_url + path + print(f"Fetching documents from {url}") + soup_info = self._scrape(url) + with contextlib.suppress(ValueError): + documents.extend(self._get_documents(soup_info)) + return documents + else: + print(f"Fetching documents from {self.web_path}") + soup_info = self.scrape() + self.folder_path = self._get_folder_path(soup_info) + return self._get_documents(soup_info) + + def _get_folder_path(self, soup: Any) -> str: + """Get the folder path to save the documents in. + + Args: + soup: BeautifulSoup4 soup object. + + Returns: + Folder path. + """ + # Get the course name + course_name = soup.find("span", {"id": "crumb_1"}) + if course_name is None: + raise ValueError("No course name found.") + course_name = course_name.text.strip() + # Prepare the folder path + course_name_clean = ( + unquote(course_name) + .replace(" ", "_") + .replace("/", "_") + .replace(":", "_") + .replace(",", "_") + .replace("?", "_") + .replace("'", "_") + .replace("!", "_") + .replace('"', "_") + ) + # Get the folder path + folder_path = Path(".") / course_name_clean + return str(folder_path) + + def _get_documents(self, soup: Any) -> List[Document]: + """Fetch content from page and return Documents. + + Args: + soup: BeautifulSoup4 soup object. + + Returns: + List of documents. + """ + attachments = self._get_attachments(soup) + self._download_attachments(attachments) + documents = self._load_documents() + return documents + + def _get_attachments(self, soup: Any) -> List[str]: + """Get all attachments from a page. + + Args: + soup: BeautifulSoup4 soup object. + + Returns: + List of attachments. + """ + from bs4 import BeautifulSoup, Tag + + # Get content list + content_list = soup.find("ul", {"class": "contentList"}) + if content_list is None: + raise ValueError("No content list found.") + content_list: BeautifulSoup # type: ignore + # Get all attachments + attachments = [] + for attachment in content_list.find_all("ul", {"class": "attachments"}): + attachment: Tag # type: ignore + for link in attachment.find_all("a"): + link: Tag # type: ignore + href = link.get("href") + # Only add if href is not None and does not start with # + if href is not None and not href.startswith("#"): + attachments.append(href) + return attachments + + def _download_attachments(self, attachments: List[str]) -> None: + """Download all attachments. + + Args: + attachments: List of attachments. + """ + # Make sure the folder exists + Path(self.folder_path).mkdir(parents=True, exist_ok=True) + # Download all attachments + for attachment in attachments: + self.download(attachment) + + def _load_documents(self) -> List[Document]: + """Load all documents in the folder. + + Returns: + List of documents. + """ + # Create the document loader + loader = DirectoryLoader( + path=self.folder_path, glob="*.pdf", loader_cls=PyPDFLoader # type: ignore + ) + # Load the documents + documents = loader.load() + # Return all documents + return documents + + def _get_paths(self, soup: Any) -> List[str]: + """Get all relative paths in the navbar.""" + relative_paths = [] + course_menu = soup.find("ul", {"class": "courseMenu"}) + if course_menu is None: + raise ValueError("No course menu found.") + for link in course_menu.find_all("a"): + href = link.get("href") + if href is not None and href.startswith("/"): + relative_paths.append(href) + return relative_paths + + def download(self, path: str) -> None: + """Download a file from a url. + + Args: + path: Path to the file. + """ + # Get the file content + response = self.session.get(self.base_url + path, allow_redirects=True) + # Get the filename + filename = self.parse_filename(response.url) + # Write the file to disk + with open(Path(self.folder_path) / filename, "wb") as f: + f.write(response.content) + + def parse_filename(self, url: str) -> str: + """Parse the filename from a url. + + Args: + url: Url to parse the filename from. + + Returns: + The filename. + """ + if (url_path := Path(url)) and url_path.suffix == ".pdf": + return url_path.name + else: + return self._parse_filename_from_url(url) + + def _parse_filename_from_url(self, url: str) -> str: + """Parse the filename from a url. + + Args: + url: Url to parse the filename from. + + Returns: + The filename. + + Raises: + ValueError: If the filename could not be parsed. + """ + filename_matches = re.search(r"filename%2A%3DUTF-8%27%27(.+)", url) + if filename_matches: + filename = filename_matches.group(1) + else: + raise ValueError(f"Could not parse filename from {url}") + if ".pdf" not in filename: + raise ValueError(f"Incorrect file type: {filename}") + filename = filename.split(".pdf")[0] + ".pdf" + filename = unquote(filename) + filename = filename.replace("%20", " ") + return filename + + +if __name__ == "__main__": + loader = BlackboardLoader( + "https:///webapps/blackboard/content/listContent.jsp?course_id=__1&content_id=__1&mode=reset", + "", + load_all_recursively=True, + ) + documents = loader.load() + print(f"Loaded {len(documents)} pages of PDFs from {loader.web_path}") diff --git a/langchain/document_loaders/college_confidential.py b/langchain/document_loaders/college_confidential.py new file mode 100644 index 0000000000000000000000000000000000000000..1eaa64bcb27cd8983ead28d43d9477c08c7d545a --- /dev/null +++ b/langchain/document_loaders/college_confidential.py @@ -0,0 +1,16 @@ +"""Loader that loads College Confidential.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.web_base import WebBaseLoader + + +class CollegeConfidentialLoader(WebBaseLoader): + """Loader that loads College Confidential webpages.""" + + def load(self) -> List[Document]: + """Load webpage.""" + soup = self.scrape() + text = soup.select_one("main[class='skin-handler']").text + metadata = {"source": self.web_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/conllu.py b/langchain/document_loaders/conllu.py new file mode 100644 index 0000000000000000000000000000000000000000..82fbeb5b2dc3569511d727dd2c59f9dc394580bf --- /dev/null +++ b/langchain/document_loaders/conllu.py @@ -0,0 +1,33 @@ +"""Load CoNLL-U files.""" +import csv +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class CoNLLULoader(BaseLoader): + """Load CoNLL-U files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + self.file_path = file_path + + def load(self) -> List[Document]: + """Load from file path.""" + with open(self.file_path, encoding="utf8") as f: + tsv = list(csv.reader(f, delimiter="\t")) + + # If len(line) > 1, the line is not a comment + lines = [line for line in tsv if len(line) > 1] + + text = "" + for i, line in enumerate(lines): + # Do not add a space after a punctuation mark or at the end of the sentence + if line[9] == "SpaceAfter=No" or i == len(lines) - 1: + text += line[1] + else: + text += line[1] + " " + + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/csv_loader.py b/langchain/document_loaders/csv_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..9911f60515d2a42e41266e37be2cedf01ed09ced --- /dev/null +++ b/langchain/document_loaders/csv_loader.py @@ -0,0 +1,62 @@ +from csv import DictReader +from typing import Dict, List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class CSVLoader(BaseLoader): + """Loads a CSV file into a list of documents. + + Each document represents one row of the CSV file. Every row is converted into a + key/value pair and outputted to a new line in the document's page_content. + + The source for each document loaded from csv is set to the value of the + `file_path` argument for all doucments by default. + You can override this by setting the `source_column` argument to the + name of a column in the CSV file. + The source of each document will then be set to the value of the column + with the name specified in `source_column`. + + Output Example: + .. code-block:: txt + + column1: value1 + column2: value2 + column3: value3 + """ + + def __init__( + self, + file_path: str, + source_column: Optional[str] = None, + csv_args: Optional[Dict] = None, + encoding: Optional[str] = None, + ): + self.file_path = file_path + self.source_column = source_column + self.encoding = encoding + if csv_args is None: + self.csv_args = { + "delimiter": ",", + "quotechar": '"', + } + else: + self.csv_args = csv_args + + def load(self) -> List[Document]: + docs = [] + + with open(self.file_path, newline="", encoding=self.encoding) as csvfile: + csv = DictReader(csvfile, **self.csv_args) # type: ignore + for i, row in enumerate(csv): + content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items()) + if self.source_column is not None: + source = row[self.source_column] + else: + source = self.file_path + metadata = {"source": source, "row": i} + doc = Document(page_content=content, metadata=metadata) + docs.append(doc) + + return docs diff --git a/langchain/document_loaders/directory.py b/langchain/document_loaders/directory.py new file mode 100644 index 0000000000000000000000000000000000000000..ecffa85eb5af5899e1c3ee2e3bec8e1e789ee634 --- /dev/null +++ b/langchain/document_loaders/directory.py @@ -0,0 +1,62 @@ +"""Loading logic for loading documents from a directory.""" +import logging +from pathlib import Path +from typing import List, Type, Union + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.html_bs import BSHTMLLoader +from langchain.document_loaders.text import TextLoader +from langchain.document_loaders.unstructured import UnstructuredFileLoader + +FILE_LOADER_TYPE = Union[ + Type[UnstructuredFileLoader], Type[TextLoader], Type[BSHTMLLoader] +] +logger = logging.getLogger(__file__) + + +def _is_visible(p: Path) -> bool: + parts = p.parts + for _p in parts: + if _p.startswith("."): + return False + return True + + +class DirectoryLoader(BaseLoader): + """Loading logic for loading documents from a directory.""" + + def __init__( + self, + path: str, + glob: str = "**/[!.]*", + silent_errors: bool = False, + load_hidden: bool = False, + loader_cls: FILE_LOADER_TYPE = UnstructuredFileLoader, + recursive: bool = False, + ): + """Initialize with path to directory and how to glob over it.""" + self.path = path + self.glob = glob + self.load_hidden = load_hidden + self.loader_cls = loader_cls + self.silent_errors = silent_errors + self.recursive = recursive + + def load(self) -> List[Document]: + """Load documents.""" + p = Path(self.path) + docs = [] + items = p.rglob(self.glob) if self.recursive else p.glob(self.glob) + for i in items: + if i.is_file(): + if _is_visible(i.relative_to(p)) or self.load_hidden: + try: + sub_docs = self.loader_cls(str(i)).load() + docs.extend(sub_docs) + except Exception as e: + if self.silent_errors: + logger.warning(e) + else: + raise e + return docs diff --git a/langchain/document_loaders/email.py b/langchain/document_loaders/email.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3ecd8845714dea66f95f83685b8082b08cf2d1 --- /dev/null +++ b/langchain/document_loaders/email.py @@ -0,0 +1,13 @@ +"""Loader that loads email files.""" +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredEmailLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load email files.""" + + def _get_elements(self) -> List: + from unstructured.partition.email import partition_email + + return partition_email(filename=self.file_path) diff --git a/langchain/document_loaders/evernote.py b/langchain/document_loaders/evernote.py new file mode 100644 index 0000000000000000000000000000000000000000..a7529f379faba66374ac05626608876a35e85c9d --- /dev/null +++ b/langchain/document_loaders/evernote.py @@ -0,0 +1,82 @@ +"""Load documents from Evernote. + +https://gist.github.com/foxmask/7b29c43a161e001ff04afdb2f181e31c +""" +import hashlib +from base64 import b64decode +from time import strptime +from typing import Any, Dict, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def _parse_content(content: str) -> str: + from pypandoc import convert_text + + text = convert_text(content, "org", format="html") + return text + + +def _parse_resource(resource: list) -> dict: + rsc_dict: Dict[str, Any] = {} + for elem in resource: + if elem.tag == "data": + # Some times elem.text is None + rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b"" + rsc_dict["hash"] = hashlib.md5(rsc_dict[elem.tag]).hexdigest() + else: + rsc_dict[elem.tag] = elem.text + + return rsc_dict + + +def _parse_note(note: List) -> dict: + note_dict: Dict[str, Any] = {} + resources = [] + for elem in note: + if elem.tag == "content": + note_dict[elem.tag] = _parse_content(elem.text) + # A copy of original content + note_dict["content-raw"] = elem.text + elif elem.tag == "resource": + resources.append(_parse_resource(elem)) + elif elem.tag == "created" or elem.tag == "updated": + note_dict[elem.tag] = strptime(elem.text, "%Y%m%dT%H%M%SZ") + else: + note_dict[elem.tag] = elem.text + + note_dict["resource"] = resources + + return note_dict + + +def _parse_note_xml(xml_file: str) -> str: + """Parse Evernote xml.""" + # Without huge_tree set to True, parser may complain about huge text node + # Try to recover, because there may be " ", which will cause + # "XMLSyntaxError: Entity 'nbsp' not defined" + from lxml import etree + + context = etree.iterparse( + xml_file, encoding="utf-8", strip_cdata=False, huge_tree=True, recover=True + ) + result_string = "" + for action, elem in context: + if elem.tag == "note": + result_string += _parse_note(elem)["content"] + return result_string + + +class EverNoteLoader(BaseLoader): + """Loader to load in EverNote files..""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + self.file_path = file_path + + def load(self) -> List[Document]: + """Load document from EverNote file.""" + text = _parse_note_xml(self.file_path) + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/facebook_chat.py b/langchain/document_loaders/facebook_chat.py new file mode 100644 index 0000000000000000000000000000000000000000..d2dec9f0c8ed0810fa7fd2325bece0d9cbb11dec --- /dev/null +++ b/langchain/document_loaders/facebook_chat.py @@ -0,0 +1,57 @@ +"""Loader that loads Facebook chat json dump.""" +import datetime +import json +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def concatenate_rows(row: dict) -> str: + """Combine message information in a readable format ready to be used.""" + sender = row["sender_name"] + text = row["content"] + date = datetime.datetime.fromtimestamp(row["timestamp_ms"] / 1000).strftime( + "%Y-%m-%d %H:%M:%S" + ) + return f"{sender} on {date}: {text}\n\n" + + +class FacebookChatLoader(BaseLoader): + """Loader that loads Facebook messages json directory dump.""" + + def __init__(self, path: str): + """Initialize with path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + try: + import pandas as pd + except ImportError: + raise ValueError( + "pandas is needed for Facebook chat loader, " + "please install with `pip install pandas`" + ) + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + normalized_messages = pd.json_normalize(d["messages"]) + df_normalized_messages = pd.DataFrame(normalized_messages) + + # Only keep plain text messages + # (no services, nor links, hashtags, code, bold ...) + df_filtered = df_normalized_messages[ + (df_normalized_messages.content.apply(lambda x: type(x) == str)) + ] + + df_filtered = df_filtered[["timestamp_ms", "content", "sender_name"]] + + text = df_filtered.apply(concatenate_rows, axis=1).str.cat(sep="") + + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/figma.py b/langchain/document_loaders/figma.py new file mode 100644 index 0000000000000000000000000000000000000000..420ef0e94747d5c6f4d797fc6717ffc9f6370ad3 --- /dev/null +++ b/langchain/document_loaders/figma.py @@ -0,0 +1,59 @@ +"""Loader that loads Figma files json dump.""" +import json +import urllib.request +from typing import Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def _stringify_value(val: Any) -> str: + if isinstance(val, str): + return val + elif isinstance(val, dict): + return "\n" + _stringify_dict(val) + elif isinstance(val, list): + return "\n".join(_stringify_value(v) for v in val) + else: + return str(val) + + +def _stringify_dict(data: dict) -> str: + text = "" + for key, value in data.items(): + text += key + ": " + _stringify_value(data[key]) + "\n" + return text + + +class FigmaFileLoader(BaseLoader): + """Loader that loads Figma file json.""" + + def __init__(self, access_token: str, ids: str, key: str): + """Initialize with access token, ids, and key.""" + self.access_token = access_token + self.ids = ids + self.key = key + + def _construct_figma_api_url(self) -> str: + api_url = "https://api.figma.com/v1/files/%s/nodes?ids=%s" % ( + self.key, + self.ids, + ) + return api_url + + def _get_figma_file(self) -> Any: + """Get Figma file from Figma REST API.""" + headers = {"X-Figma-Token": self.access_token} + request = urllib.request.Request( + self._construct_figma_api_url(), headers=headers + ) + with urllib.request.urlopen(request) as response: + json_data = json.loads(response.read().decode()) + return json_data + + def load(self) -> List[Document]: + """Load file""" + data = self._get_figma_file() + text = _stringify_dict(data) + metadata = {"source": self._construct_figma_api_url()} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/gcs_directory.py b/langchain/document_loaders/gcs_directory.py new file mode 100644 index 0000000000000000000000000000000000000000..52939eb3b0423eedd76585910a7516b6a54c354a --- /dev/null +++ b/langchain/document_loaders/gcs_directory.py @@ -0,0 +1,32 @@ +"""Loading logic for loading documents from an GCS directory.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.gcs_file import GCSFileLoader + + +class GCSDirectoryLoader(BaseLoader): + """Loading logic for loading documents from GCS.""" + + def __init__(self, project_name: str, bucket: str, prefix: str = ""): + """Initialize with bucket and key name.""" + self.project_name = project_name + self.bucket = bucket + self.prefix = prefix + + def load(self) -> List[Document]: + """Load documents.""" + try: + from google.cloud import storage + except ImportError: + raise ValueError( + "Could not import google-cloud-storage python package. " + "Please it install it with `pip install google-cloud-storage`." + ) + client = storage.Client(project=self.project_name) + docs = [] + for blob in client.list_blobs(self.bucket, prefix=self.prefix): + loader = GCSFileLoader(self.project_name, self.bucket, blob.name) + docs.extend(loader.load()) + return docs diff --git a/langchain/document_loaders/gcs_file.py b/langchain/document_loaders/gcs_file.py new file mode 100644 index 0000000000000000000000000000000000000000..d6fb172d9303c474e72066f8cab3fe73b3dd9c75 --- /dev/null +++ b/langchain/document_loaders/gcs_file.py @@ -0,0 +1,40 @@ +"""Loading logic for loading documents from a GCS file.""" +import tempfile +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class GCSFileLoader(BaseLoader): + """Loading logic for loading documents from GCS.""" + + def __init__(self, project_name: str, bucket: str, blob: str): + """Initialize with bucket and key name.""" + self.bucket = bucket + self.blob = blob + self.project_name = project_name + + def load(self) -> List[Document]: + """Load documents.""" + try: + from google.cloud import storage + except ImportError: + raise ValueError( + "Could not import google-cloud-storage python package. " + "Please it install it with `pip install google-cloud-storage`." + ) + + # Initialise a client + storage_client = storage.Client(self.project_name) + # Create a bucket object for our bucket + bucket = storage_client.get_bucket(self.bucket) + # Create a blob object from the filepath + blob = bucket.blob(self.blob) + with tempfile.TemporaryDirectory() as temp_dir: + file_path = f"{temp_dir}/{self.blob}" + # Download the file to a destination + blob.download_to_filename(file_path) + loader = UnstructuredFileLoader(file_path) + return loader.load() diff --git a/langchain/document_loaders/gitbook.py b/langchain/document_loaders/gitbook.py new file mode 100644 index 0000000000000000000000000000000000000000..1c40b3f6e10c11bad875e4f2acb649575d7c36d7 --- /dev/null +++ b/langchain/document_loaders/gitbook.py @@ -0,0 +1,70 @@ +"""Loader that loads GitBook.""" +from typing import Any, List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.web_base import WebBaseLoader + + +class GitbookLoader(WebBaseLoader): + """Load GitBook data. + + 1. load from either a single page, or + 2. load all (relative) paths in the navbar. + """ + + def __init__( + self, + web_page: str, + load_all_paths: bool = False, + base_url: Optional[str] = None, + ): + """Initialize with web page and whether to load all paths. + + Args: + web_page: The web page to load or the starting point from where + relative paths are discovered. + load_all_paths: If set to True, all relative paths in the navbar + are loaded instead of only `web_page`. + base_url: If `load_all_paths` is True, the relative paths are + appended to this base url. Defaults to `web_page` if not set. + """ + super().__init__(web_page) + self.base_url = base_url or web_page + if self.base_url.endswith("/"): + self.base_url = self.base_url[:-1] + self.load_all_paths = load_all_paths + + def load(self) -> List[Document]: + """Fetch text from one single GitBook page.""" + if self.load_all_paths: + soup_info = self.scrape() + relative_paths = self._get_paths(soup_info) + documents = [] + for path in relative_paths: + url = self.base_url + path + print(f"Fetching text from {url}") + soup_info = self._scrape(url) + documents.append(self._get_document(soup_info, url)) + return documents + else: + soup_info = self.scrape() + return [self._get_document(soup_info, self.web_path)] + + def _get_document(self, soup: Any, custom_url: Optional[str] = None) -> Document: + """Fetch content from page and return Document.""" + page_content_raw = soup.find("main") + content = page_content_raw.get_text(separator="\n").strip() + title_if_exists = page_content_raw.find("h1") + title = title_if_exists.text if title_if_exists else "" + metadata = { + "source": custom_url if custom_url else self.web_path, + "title": title, + } + return Document(page_content=content, metadata=metadata) + + def _get_paths(self, soup: Any) -> List[str]: + """Fetch all relative paths in the navbar.""" + nav = soup.find("nav") + links = nav.findAll("a") + # only return relative links + return [link.get("href") for link in links if link.get("href")[0] == "/"] diff --git a/langchain/document_loaders/googledrive.py b/langchain/document_loaders/googledrive.py new file mode 100644 index 0000000000000000000000000000000000000000..d6c8951efe6ae5f6a94e63263520c1750e445871 --- /dev/null +++ b/langchain/document_loaders/googledrive.py @@ -0,0 +1,203 @@ +"""Loader that loads data from Google Drive.""" + +# Prerequisites: +# 1. Create a Google Cloud project +# 2. Enable the Google Drive API: +# https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com +# 3. Authorize credentials for desktop app: +# https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application # noqa: E501 +# 4. For service accounts visit +# https://cloud.google.com/iam/docs/service-accounts-create + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, root_validator, validator + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + +SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] + + +class GoogleDriveLoader(BaseLoader, BaseModel): + """Loader that loads Google Docs from Google Drive.""" + + service_account_key: Path = Path.home() / ".credentials" / "keys.json" + credentials_path: Path = Path.home() / ".credentials" / "credentials.json" + token_path: Path = Path.home() / ".credentials" / "token.json" + folder_id: Optional[str] = None + document_ids: Optional[List[str]] = None + file_ids: Optional[List[str]] = None + + @root_validator + def validate_folder_id_or_document_ids( + cls, values: Dict[str, Any] + ) -> Dict[str, Any]: + """Validate that either folder_id or document_ids is set, but not both.""" + if values.get("folder_id") and ( + values.get("document_ids") or values.get("file_ids") + ): + raise ValueError( + "Cannot specify both folder_id and document_ids nor " + "folder_id and file_ids" + ) + if ( + not values.get("folder_id") + and not values.get("document_ids") + and not values.get("file_ids") + ): + raise ValueError("Must specify either folder_id, document_ids, or file_ids") + return values + + @validator("credentials_path") + def validate_credentials_path(cls, v: Any, **kwargs: Any) -> Any: + """Validate that credentials_path exists.""" + if not v.exists(): + raise ValueError(f"credentials_path {v} does not exist") + return v + + def _load_credentials(self) -> Any: + """Load credentials.""" + # Adapted from https://developers.google.com/drive/api/v3/quickstart/python + try: + from google.auth.transport.requests import Request + from google.oauth2 import service_account + from google.oauth2.credentials import Credentials + from google_auth_oauthlib.flow import InstalledAppFlow + except ImportError: + raise ImportError( + "You must run" + "`pip install --upgrade " + "google-api-python-client google-auth-httplib2 " + "google-auth-oauthlib`" + "to use the Google Drive loader." + ) + + creds = None + if self.service_account_key.exists(): + return service_account.Credentials.from_service_account_file( + str(self.service_account_key), scopes=SCOPES + ) + + if self.token_path.exists(): + creds = Credentials.from_authorized_user_file(str(self.token_path), SCOPES) + + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file( + str(self.credentials_path), SCOPES + ) + creds = flow.run_local_server(port=0) + with open(self.token_path, "w") as token: + token.write(creds.to_json()) + + return creds + + def _load_document_from_id(self, id: str) -> Document: + """Load a document from an ID.""" + from io import BytesIO + + from googleapiclient.discovery import build + from googleapiclient.http import MediaIoBaseDownload + + creds = self._load_credentials() + service = build("drive", "v3", credentials=creds) + + request = service.files().export_media(fileId=id, mimeType="text/plain") + fh = BytesIO() + downloader = MediaIoBaseDownload(fh, request) + done = False + while done is False: + status, done = downloader.next_chunk() + text = fh.getvalue().decode("utf-8") + metadata = {"source": f"https://docs.google.com/document/d/{id}/edit"} + return Document(page_content=text, metadata=metadata) + + def _load_documents_from_folder(self) -> List[Document]: + """Load documents from a folder.""" + from googleapiclient.discovery import build + + creds = self._load_credentials() + service = build("drive", "v3", credentials=creds) + + results = ( + service.files() + .list( + q=f"'{self.folder_id}' in parents", + pageSize=1000, + fields="nextPageToken, files(id, name, mimeType)", + ) + .execute() + ) + items = results.get("files", []) + returns = [] + for item in items: + if item["mimeType"] == "application/vnd.google-apps.document": + returns.append(self._load_document_from_id(item["id"])) + elif item["mimeType"] == "application/pdf": + returns.extend(self._load_file_from_id(item["id"])) + else: + pass + + return returns + + def _load_documents_from_ids(self) -> List[Document]: + """Load documents from a list of IDs.""" + if not self.document_ids: + raise ValueError("document_ids must be set") + + return [self._load_document_from_id(doc_id) for doc_id in self.document_ids] + + def _load_file_from_id(self, id: str) -> List[Document]: + """Load a file from an ID.""" + from io import BytesIO + + from googleapiclient.discovery import build + from googleapiclient.http import MediaIoBaseDownload + + creds = self._load_credentials() + service = build("drive", "v3", credentials=creds) + + request = service.files().get_media(fileId=id) + fh = BytesIO() + downloader = MediaIoBaseDownload(fh, request) + done = False + while done is False: + status, done = downloader.next_chunk() + content = fh.getvalue() + + from PyPDF2 import PdfReader + + pdf_reader = PdfReader(BytesIO(content)) + + return [ + Document( + page_content=page.extract_text(), + metadata={ + "source": f"https://drive.google.com/file/d/{id}/view", + "page": i, + }, + ) + for i, page in enumerate(pdf_reader.pages) + ] + + def _load_file_from_ids(self) -> List[Document]: + """Load files from a list of IDs.""" + if not self.file_ids: + raise ValueError("file_ids must be set") + docs = [] + for file_id in self.file_ids: + docs.extend(self._load_file_from_id(file_id)) + return docs + + def load(self) -> List[Document]: + """Load documents.""" + if self.folder_id: + return self._load_documents_from_folder() + elif self.document_ids: + return self._load_documents_from_ids() + else: + return self._load_file_from_ids() diff --git a/langchain/document_loaders/gutenberg.py b/langchain/document_loaders/gutenberg.py new file mode 100644 index 0000000000000000000000000000000000000000..41a0a5f55a9d73a604e44bb3fee9c22e1e593ce8 --- /dev/null +++ b/langchain/document_loaders/gutenberg.py @@ -0,0 +1,28 @@ +"""Loader that loads .txt web files.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class GutenbergLoader(BaseLoader): + """Loader that uses urllib to load .txt web files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + if not file_path.startswith("https://www.gutenberg.org"): + raise ValueError("file path must start with 'https://www.gutenberg.org'") + + if not file_path.endswith(".txt"): + raise ValueError("file path must end with '.txt'") + + self.file_path = file_path + + def load(self) -> List[Document]: + """Load file.""" + from urllib.request import urlopen + + elements = urlopen(self.file_path) + text = "\n\n".join([str(el.decode("utf-8-sig")) for el in elements]) + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/hn.py b/langchain/document_loaders/hn.py new file mode 100644 index 0000000000000000000000000000000000000000..91ff8d9d5e0ca12bcba41e1d2577a8309393b132 --- /dev/null +++ b/langchain/document_loaders/hn.py @@ -0,0 +1,60 @@ +"""Loader that loads HN.""" +from typing import Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.web_base import WebBaseLoader + + +class HNLoader(WebBaseLoader): + """Load Hacker News data from either main page results or the comments page.""" + + def load(self) -> List[Document]: + """Get important HN webpage information. + + Components are: + - title + - content + - source url, + - time of post + - author of the post + - number of comments + - rank of the post + """ + soup_info = self.scrape() + if "item" in self.web_path: + return self.load_comments(soup_info) + else: + return self.load_results(soup_info) + + def load_comments(self, soup_info: Any) -> List[Document]: + """Load comments from a HN post.""" + comments = soup_info.select("tr[class='athing comtr']") + title = soup_info.select_one("tr[id='pagespace']").get("title") + return [ + Document( + page_content=comment.text.strip(), + metadata={"source": self.web_path, "title": title}, + ) + for comment in comments + ] + + def load_results(self, soup: Any) -> List[Document]: + """Load items from an HN page.""" + items = soup.select("tr[class='athing']") + documents = [] + for lineItem in items: + ranking = lineItem.select_one("span[class='rank']").text + link = lineItem.find("span", {"class": "titleline"}).find("a").get("href") + title = lineItem.find("span", {"class": "titleline"}).text.strip() + metadata = { + "source": self.web_path, + "title": title, + "link": link, + "ranking": ranking, + } + documents.append( + Document( + page_content=title, link=link, ranking=ranking, metadata=metadata + ) + ) + return documents diff --git a/langchain/document_loaders/html.py b/langchain/document_loaders/html.py new file mode 100644 index 0000000000000000000000000000000000000000..517842159e975aa6ad2ff47570666fb1b28bee4b --- /dev/null +++ b/langchain/document_loaders/html.py @@ -0,0 +1,13 @@ +"""Loader that uses unstructured to load HTML files.""" +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredHTMLLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load HTML files.""" + + def _get_elements(self) -> List: + from unstructured.partition.html import partition_html + + return partition_html(filename=self.file_path, **self.unstructured_kwargs) diff --git a/langchain/document_loaders/html_bs.py b/langchain/document_loaders/html_bs.py new file mode 100644 index 0000000000000000000000000000000000000000..92802ccb5dd2593934f18ac5873521d8674a60ab --- /dev/null +++ b/langchain/document_loaders/html_bs.py @@ -0,0 +1,36 @@ +"""Loader that uses bs4 to load HTML files, enriching metadata with page title.""" + +import logging +from typing import Dict, List, Union + +from bs4 import BeautifulSoup + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + +logger = logging.getLogger(__file__) + + +class BSHTMLLoader(BaseLoader): + """Loader that uses beautiful soup to parse HTML files.""" + + def __init__(self, file_path: str) -> None: + self.file_path = file_path + + def load(self) -> List[Document]: + """Load HTML document into document objects.""" + with open(self.file_path, "r") as f: + soup = BeautifulSoup(f, features="lxml") + + text = soup.get_text() + + if soup.title: + title = str(soup.title.string) + else: + title = "" + + metadata: Dict[str, Union[str, None]] = { + "source": self.file_path, + "title": title, + } + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/ifixit.py b/langchain/document_loaders/ifixit.py new file mode 100644 index 0000000000000000000000000000000000000000..61169ade6009b4a55bf312111442aaa0874693a3 --- /dev/null +++ b/langchain/document_loaders/ifixit.py @@ -0,0 +1,205 @@ +"""Loader that loads iFixit data.""" +from typing import List, Optional + +import requests + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.web_base import WebBaseLoader + +IFIXIT_BASE_URL = "https://www.ifixit.com/api/2.0" + + +class IFixitLoader(BaseLoader): + """Load iFixit repair guides, device wikis and answers. + + iFixit is the largest, open repair community on the web. The site contains nearly + 100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is + licensed under CC-BY. + + This loader will allow you to download the text of a repair guide, text of Q&A's + and wikis from devices on iFixit using their open APIs and web scraping. + """ + + def __init__(self, web_path: str): + """Initialize with web path.""" + if not web_path.startswith("https://www.ifixit.com"): + raise ValueError("web path must start with 'https://www.ifixit.com'") + + path = web_path.replace("https://www.ifixit.com", "") + + allowed_paths = ["/Device", "/Guide", "/Answers", "/Teardown"] + + """ TODO: Add /Wiki """ + if not any(path.startswith(allowed_path) for allowed_path in allowed_paths): + raise ValueError( + "web path must start with /Device, /Guide, /Teardown or /Answers" + ) + + pieces = [x for x in path.split("/") if x] + + """Teardowns are just guides by a different name""" + self.page_type = pieces[0] if pieces[0] != "Teardown" else "Guide" + + if self.page_type == "Guide" or self.page_type == "Answers": + self.id = pieces[2] + else: + self.id = pieces[1] + + self.web_path = web_path + + def load(self) -> List[Document]: + if self.page_type == "Device": + return self.load_device() + elif self.page_type == "Guide" or self.page_type == "Teardown": + return self.load_guide() + elif self.page_type == "Answers": + return self.load_questions_and_answers() + else: + raise ValueError("Unknown page type: " + self.page_type) + + @staticmethod + def load_suggestions(query: str = "", doc_type: str = "all") -> List[Document]: + res = requests.get( + IFIXIT_BASE_URL + "/suggest/" + query + "?doctypes=" + doc_type + ) + + if res.status_code != 200: + raise ValueError( + 'Could not load suggestions for "' + query + '"\n' + res.json() + ) + + data = res.json() + + results = data["results"] + output = [] + + for result in results: + try: + loader = IFixitLoader(result["url"]) + if loader.page_type == "Device": + output += loader.load_device(include_guides=False) + else: + output += loader.load() + except ValueError: + continue + + return output + + def load_questions_and_answers( + self, url_override: Optional[str] = None + ) -> List[Document]: + loader = WebBaseLoader(self.web_path if url_override is None else url_override) + soup = loader.scrape() + + output = [] + + title = soup.find("h1", "post-title").text + + output.append("# " + title) + output.append(soup.select_one(".post-content .post-text").text.strip()) + + answersHeader = soup.find("div", "post-answers-header") + if answersHeader: + output.append("\n## " + answersHeader.text.strip()) + + for answer in soup.select(".js-answers-list .post.post-answer"): + if answer.has_attr("itemprop") and "acceptedAnswer" in answer["itemprop"]: + output.append("\n### Accepted Answer") + elif "post-helpful" in answer["class"]: + output.append("\n### Most Helpful Answer") + else: + output.append("\n### Other Answer") + + output += [ + a.text.strip() for a in answer.select(".post-content .post-text") + ] + output.append("\n") + + text = "\n".join(output).strip() + + metadata = {"source": self.web_path, "title": title} + + return [Document(page_content=text, metadata=metadata)] + + def load_device( + self, url_override: Optional[str] = None, include_guides: bool = True + ) -> List[Document]: + documents = [] + if url_override is None: + url = IFIXIT_BASE_URL + "/wikis/CATEGORY/" + self.id + else: + url = url_override + + res = requests.get(url) + data = res.json() + text = "\n".join( + [ + data[key] + for key in ["title", "description", "contents_raw"] + if key in data + ] + ).strip() + + metadata = {"source": self.web_path, "title": data["title"]} + documents.append(Document(page_content=text, metadata=metadata)) + + if include_guides: + """Load and return documents for each guide linked to from the device""" + guide_urls = [guide["url"] for guide in data["guides"]] + for guide_url in guide_urls: + documents.append(IFixitLoader(guide_url).load()[0]) + + return documents + + def load_guide(self, url_override: Optional[str] = None) -> List[Document]: + if url_override is None: + url = IFIXIT_BASE_URL + "/guides/" + self.id + else: + url = url_override + + res = requests.get(url) + + if res.status_code != 200: + raise ValueError( + "Could not load guide: " + self.web_path + "\n" + res.json() + ) + + data = res.json() + + doc_parts = ["# " + data["title"], data["introduction_raw"]] + + doc_parts.append("\n\n###Tools Required:") + if len(data["tools"]) == 0: + doc_parts.append("\n - None") + else: + for tool in data["tools"]: + doc_parts.append("\n - " + tool["text"]) + + doc_parts.append("\n\n###Parts Required:") + if len(data["parts"]) == 0: + doc_parts.append("\n - None") + else: + for part in data["parts"]: + doc_parts.append("\n - " + part["text"]) + + for row in data["steps"]: + doc_parts.append( + "\n\n## " + + ( + row["title"] + if row["title"] != "" + else "Step {}".format(row["orderby"]) + ) + ) + + for line in row["lines"]: + doc_parts.append(line["text_raw"]) + + doc_parts.append(data["conclusion_raw"]) + + text = "\n".join(doc_parts) + + metadata = {"source": self.web_path, "title": data["title"]} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/image.py b/langchain/document_loaders/image.py new file mode 100644 index 0000000000000000000000000000000000000000..9732495d42272d7e5e15f0f402b3b8ad88e498fe --- /dev/null +++ b/langchain/document_loaders/image.py @@ -0,0 +1,13 @@ +"""Loader that loads image files.""" +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredImageLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load image files, such as PNGs and JPGs.""" + + def _get_elements(self) -> List: + from unstructured.partition.image import partition_image + + return partition_image(filename=self.file_path, **self.unstructured_kwargs) diff --git a/langchain/document_loaders/imsdb.py b/langchain/document_loaders/imsdb.py new file mode 100644 index 0000000000000000000000000000000000000000..4589553d333edb4c1982fe98e103fd96db0b6976 --- /dev/null +++ b/langchain/document_loaders/imsdb.py @@ -0,0 +1,16 @@ +"""Loader that loads IMSDb.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.web_base import WebBaseLoader + + +class IMSDbLoader(WebBaseLoader): + """Loader that loads IMSDb webpages.""" + + def load(self) -> List[Document]: + """Load webpage.""" + soup = self.scrape() + text = soup.select_one("td[class='scrtext']").text + metadata = {"source": self.web_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/markdown.py b/langchain/document_loaders/markdown.py new file mode 100644 index 0000000000000000000000000000000000000000..c049e8ff6d61b568d7c56266614e4d52cd797a38 --- /dev/null +++ b/langchain/document_loaders/markdown.py @@ -0,0 +1,25 @@ +"""Loader that loads Markdown files.""" +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredMarkdownLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load markdown files.""" + + def _get_elements(self) -> List: + from unstructured.__version__ import __version__ as __unstructured_version__ + from unstructured.partition.md import partition_md + + # NOTE(MthwRobinson) - enables the loader to work when you're using pre-release + # versions of unstructured like 0.4.17-dev1 + _unstructured_version = __unstructured_version__.split("-")[0] + unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")]) + + if unstructured_version < (0, 4, 16): + raise ValueError( + f"You are on unstructured version {__unstructured_version__}. " + "Partitioning markdown files is only supported in unstructured>=0.4.16." + ) + + return partition_md(filename=self.file_path) diff --git a/langchain/document_loaders/notebook.py b/langchain/document_loaders/notebook.py new file mode 100644 index 0000000000000000000000000000000000000000..aaa5b057eaf65274bda26b9149bbf019740981c7 --- /dev/null +++ b/langchain/document_loaders/notebook.py @@ -0,0 +1,109 @@ +"""Loader that loads .ipynb notebook files.""" +import json +from pathlib import Path +from typing import Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def concatenate_cells( + cell: dict, include_outputs: bool, max_output_length: int, traceback: bool +) -> str: + """Combine cells information in a readable format ready to be used.""" + cell_type = cell["cell_type"] + source = cell["source"] + output = cell["outputs"] + + if include_outputs and cell_type == "code" and output: + if "ename" in output[0].keys(): + error_name = output[0]["ename"] + error_value = output[0]["evalue"] + if traceback: + traceback = output[0]["traceback"] + return ( + f"'{cell_type}' cell: '{source}'\n, gives error '{error_name}'," + f" with description '{error_value}'\n" + f"and traceback '{traceback}'\n\n" + ) + else: + return ( + f"'{cell_type}' cell: '{source}'\n, gives error '{error_name}'," + f"with description '{error_value}'\n\n" + ) + elif output[0]["output_type"] == "stream": + output = output[0]["text"] + min_output = min(max_output_length, len(output)) + return ( + f"'{cell_type}' cell: '{source}'\n with " + f"output: '{output[:min_output]}'\n\n" + ) + else: + return f"'{cell_type}' cell: '{source}'\n\n" + + return "" + + +def remove_newlines(x: Any) -> Any: + """Remove recursively newlines, no matter the data structure they are stored in.""" + import pandas as pd + + if isinstance(x, str): + return x.replace("\n", "") + elif isinstance(x, list): + return [remove_newlines(elem) for elem in x] + elif isinstance(x, pd.DataFrame): + return x.applymap(remove_newlines) + else: + return x + + +class NotebookLoader(BaseLoader): + """Loader that loads .ipynb notebook files.""" + + def __init__( + self, + path: str, + include_outputs: bool = False, + max_output_length: int = 10, + remove_newline: bool = False, + traceback: bool = False, + ): + """Initialize with path.""" + self.file_path = path + self.include_outputs = include_outputs + self.max_output_length = max_output_length + self.remove_newline = remove_newline + self.traceback = traceback + + def load( + self, + ) -> List[Document]: + """Load documents.""" + try: + import pandas as pd + except ImportError: + raise ValueError( + "pandas is needed for Notebook Loader, " + "please install with `pip install pandas`" + ) + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + data = pd.json_normalize(d["cells"]) + filtered_data = data[["cell_type", "source", "outputs"]] + if self.remove_newline: + filtered_data = filtered_data.applymap(remove_newlines) + + text = filtered_data.apply( + lambda x: concatenate_cells( + x, self.include_outputs, self.max_output_length, self.traceback + ), + axis=1, + ).str.cat(sep=" ") + + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/notion.py b/langchain/document_loaders/notion.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d83bf9eedbdcad1680d94c80afeb175e5b365a --- /dev/null +++ b/langchain/document_loaders/notion.py @@ -0,0 +1,25 @@ +"""Loader that loads Notion directory dump.""" +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class NotionDirectoryLoader(BaseLoader): + """Loader that loads Notion directory dump.""" + + def __init__(self, path: str): + """Initialize with path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + ps = list(Path(self.file_path).glob("**/*.md")) + docs = [] + for p in ps: + with open(p) as f: + text = f.read() + metadata = {"source": str(p)} + docs.append(Document(page_content=text, metadata=metadata)) + return docs diff --git a/langchain/document_loaders/obsidian.py b/langchain/document_loaders/obsidian.py new file mode 100644 index 0000000000000000000000000000000000000000..df5a5d7e44b2584f03a8f3851dee8547175ba732 --- /dev/null +++ b/langchain/document_loaders/obsidian.py @@ -0,0 +1,26 @@ +"""Loader that loads Obsidian directory dump.""" +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class ObsidianLoader(BaseLoader): + """Loader that loads Obsidian files from disk.""" + + def __init__(self, path: str, encoding: str = "UTF-8"): + """Initialize with path.""" + self.file_path = path + self.encoding = encoding + + def load(self) -> List[Document]: + """Load documents.""" + ps = list(Path(self.file_path).glob("**/*.md")) + docs = [] + for p in ps: + with open(p, encoding=self.encoding) as f: + text = f.read() + metadata = {"source": str(p)} + docs.append(Document(page_content=text, metadata=metadata)) + return docs diff --git a/langchain/document_loaders/pdf.py b/langchain/document_loaders/pdf.py new file mode 100644 index 0000000000000000000000000000000000000000..b7e9cd1cd986f87ab24e8fc149b9cd924b82fb53 --- /dev/null +++ b/langchain/document_loaders/pdf.py @@ -0,0 +1,171 @@ +"""Loader that loads PDF files.""" +import os +import tempfile +from abc import ABC +from typing import Any, List, Optional +from urllib.parse import urlparse + +import requests + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredPDFLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load PDF files.""" + + def _get_elements(self) -> List: + from unstructured.partition.pdf import partition_pdf + + return partition_pdf(filename=self.file_path, **self.unstructured_kwargs) + + +class BasePDFLoader(BaseLoader, ABC): + """Base loader class for PDF files. + + Defaults to check for local file, but if the file is a web path, it will download it + to a temporary file, and use that, then clean up the temporary file after completion + """ + + file_path: str + web_path: Optional[str] = None + + def __init__(self, file_path: str): + """Initialize with file path.""" + self.file_path = file_path + if "~" in self.file_path: + self.file_path = os.path.expanduser(self.file_path) + + # If the file is a web path, download it to a temporary file, and use that + if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path): + r = requests.get(self.file_path) + + if r.status_code != 200: + raise ValueError( + "Check the url of your file; returned status code %s" + % r.status_code + ) + + self.web_path = self.file_path + self.temp_file = tempfile.NamedTemporaryFile() + self.temp_file.write(r.content) + self.file_path = self.temp_file.name + elif not os.path.isfile(self.file_path): + raise ValueError("File path %s is not a valid file or url" % self.file_path) + + def __del__(self) -> None: + if hasattr(self, "temp_file"): + self.temp_file.close() + + @staticmethod + def _is_valid_url(url: str) -> bool: + """Check if the url is valid.""" + parsed = urlparse(url) + return bool(parsed.netloc) and bool(parsed.scheme) + + +class OnlinePDFLoader(BasePDFLoader): + """Loader that loads online PDFs.""" + + def load(self) -> List[Document]: + """Load documents.""" + loader = UnstructuredPDFLoader(str(self.file_path)) + return loader.load() + + +class PyPDFLoader(BasePDFLoader): + """Loads a PDF with pypdf and chunks at character level. + + Loader also stores page numbers in metadatas. + """ + + def __init__(self, file_path: str): + """Initialize with file path.""" + try: + import pypdf # noqa:F401 + except ImportError: + raise ValueError( + "pypdf package not found, please install it with " "`pip install pypdf`" + ) + super().__init__(file_path) + + def load(self) -> List[Document]: + """Load given path as pages.""" + import pypdf + + with open(self.file_path, "rb") as pdf_file_obj: + pdf_reader = pypdf.PdfReader(pdf_file_obj) + return [ + Document( + page_content=page.extract_text(), + metadata={"source": self.file_path, "page": i}, + ) + for i, page in enumerate(pdf_reader.pages) + ] + + +class PDFMinerLoader(BasePDFLoader): + """Loader that uses PDFMiner to load PDF files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + try: + from pdfminer.high_level import extract_text # noqa:F401 + except ImportError: + raise ValueError( + "pdfminer package not found, please install it with " + "`pip install pdfminer.six`" + ) + + super().__init__(file_path) + + def load(self) -> List[Document]: + """Load file.""" + from pdfminer.high_level import extract_text + + text = extract_text(self.file_path) + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] + + +class PyMuPDFLoader(BasePDFLoader): + """Loader that uses PyMuPDF to load PDF files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + try: + import fitz # noqa:F401 + except ImportError: + raise ValueError( + "PyMuPDF package not found, please install it with " + "`pip install pymupdf`" + ) + + super().__init__(file_path) + + def load(self, **kwargs: Optional[Any]) -> List[Document]: + """Load file.""" + import fitz + + doc = fitz.open(self.file_path) # open document + file_path = self.file_path if self.web_path is None else self.web_path + + return [ + Document( + page_content=page.get_text(**kwargs).encode("utf-8"), + metadata=dict( + { + "file_path": file_path, + "page_number": page.number + 1, + "total_pages": len(doc), + }, + **{ + k: doc.metadata[k] + for k in doc.metadata + if type(doc.metadata[k]) in [str, int] + } + ), + ) + for page in doc + ] diff --git a/langchain/document_loaders/powerpoint.py b/langchain/document_loaders/powerpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..9c49be2afabd8ba7d38d0f6da30ef279ae886c9d --- /dev/null +++ b/langchain/document_loaders/powerpoint.py @@ -0,0 +1,43 @@ +"""Loader that loads powerpoint files.""" +import os +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredPowerPointLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load powerpoint files.""" + + def _get_elements(self) -> List: + from unstructured.__version__ import __version__ as __unstructured_version__ + from unstructured.file_utils.filetype import FileType, detect_filetype + + unstructured_version = tuple( + [int(x) for x in __unstructured_version__.split(".")] + ) + # NOTE(MthwRobinson) - magic will raise an import error if the libmagic + # system dependency isn't installed. If it's not installed, we'll just + # check the file extension + try: + import magic # noqa: F401 + + is_ppt = detect_filetype(self.file_path) == FileType.PPT + except ImportError: + _, extension = os.path.splitext(self.file_path) + is_ppt = extension == ".ppt" + + if is_ppt and unstructured_version < (0, 4, 11): + raise ValueError( + f"You are on unstructured version {__unstructured_version__}. " + "Partitioning .ppt files is only supported in unstructured>=0.4.11. " + "Please upgrade the unstructured package and try again." + ) + + if is_ppt: + from unstructured.partition.ppt import partition_ppt + + return partition_ppt(filename=self.file_path, **self.unstructured_kwargs) + else: + from unstructured.partition.pptx import partition_pptx + + return partition_pptx(filename=self.file_path, **self.unstructured_kwargs) diff --git a/langchain/document_loaders/readthedocs.py b/langchain/document_loaders/readthedocs.py new file mode 100644 index 0000000000000000000000000000000000000000..a8ca2433fbf469f79654be41ef1171160bcd83a2 --- /dev/null +++ b/langchain/document_loaders/readthedocs.py @@ -0,0 +1,62 @@ +"""Loader that loads ReadTheDocs documentation directory dump.""" +from pathlib import Path +from typing import Any, List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class ReadTheDocsLoader(BaseLoader): + """Loader that loads ReadTheDocs documentation directory dump.""" + + def __init__( + self, + path: str, + encoding: Optional[str] = None, + errors: Optional[str] = None, + **kwargs: Optional[Any] + ): + """Initialize path.""" + try: + from bs4 import BeautifulSoup + + except ImportError: + raise ValueError( + "Could not import python packages. " + "Please install it with `pip install beautifulsoup4`. " + ) + + try: + _ = BeautifulSoup( + "Parser builder library test.", **kwargs + ) + except Exception as e: + raise ValueError("Parsing kwargs do not appear valid") from e + + self.file_path = path + self.encoding = encoding + self.errors = errors + self.bs_kwargs = kwargs + + def load(self) -> List[Document]: + """Load documents.""" + from bs4 import BeautifulSoup + + def _clean_data(data: str) -> str: + soup = BeautifulSoup(data, **self.bs_kwargs) + text = soup.find_all("main", {"id": "main-content"}) + if len(text) != 0: + text = text[0].get_text() + else: + text = "" + return "\n".join([t for t in text.split("\n") if t]) + + docs = [] + for p in Path(self.file_path).rglob("*"): + if p.is_dir(): + continue + with open(p, encoding=self.encoding, errors=self.errors) as f: + text = _clean_data(f.read()) + metadata = {"source": str(p)} + docs.append(Document(page_content=text, metadata=metadata)) + return docs diff --git a/langchain/document_loaders/roam.py b/langchain/document_loaders/roam.py new file mode 100644 index 0000000000000000000000000000000000000000..ff06885764cb761caaa9a7618a91d0f573421410 --- /dev/null +++ b/langchain/document_loaders/roam.py @@ -0,0 +1,25 @@ +"""Loader that loads Roam directory dump.""" +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class RoamLoader(BaseLoader): + """Loader that loads Roam files from disk.""" + + def __init__(self, path: str): + """Initialize with path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + ps = list(Path(self.file_path).glob("**/*.md")) + docs = [] + for p in ps: + with open(p) as f: + text = f.read() + metadata = {"source": str(p)} + docs.append(Document(page_content=text, metadata=metadata)) + return docs diff --git a/langchain/document_loaders/s3_directory.py b/langchain/document_loaders/s3_directory.py new file mode 100644 index 0000000000000000000000000000000000000000..98fa44088181a24559a0b786ccc3a786cab6af61 --- /dev/null +++ b/langchain/document_loaders/s3_directory.py @@ -0,0 +1,32 @@ +"""Loading logic for loading documents from an s3 directory.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.s3_file import S3FileLoader + + +class S3DirectoryLoader(BaseLoader): + """Loading logic for loading documents from s3.""" + + def __init__(self, bucket: str, prefix: str = ""): + """Initialize with bucket and key name.""" + self.bucket = bucket + self.prefix = prefix + + def load(self) -> List[Document]: + """Load documents.""" + try: + import boto3 + except ImportError: + raise ValueError( + "Could not import boto3 python package. " + "Please it install it with `pip install boto3`." + ) + s3 = boto3.resource("s3") + bucket = s3.Bucket(self.bucket) + docs = [] + for obj in bucket.objects.filter(Prefix=self.prefix): + loader = S3FileLoader(self.bucket, obj.key) + docs.extend(loader.load()) + return docs diff --git a/langchain/document_loaders/s3_file.py b/langchain/document_loaders/s3_file.py new file mode 100644 index 0000000000000000000000000000000000000000..f78913b21c20f5acd7f747325437a458cfdec649 --- /dev/null +++ b/langchain/document_loaders/s3_file.py @@ -0,0 +1,34 @@ +"""Loading logic for loading documents from an s3 file.""" +import os +import tempfile +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class S3FileLoader(BaseLoader): + """Loading logic for loading documents from s3.""" + + def __init__(self, bucket: str, key: str): + """Initialize with bucket and key name.""" + self.bucket = bucket + self.key = key + + def load(self) -> List[Document]: + """Load documents.""" + try: + import boto3 + except ImportError: + raise ValueError( + "Could not import boto3 python package. " + "Please it install it with `pip install boto3`." + ) + s3 = boto3.client("s3") + with tempfile.TemporaryDirectory() as temp_dir: + file_path = f"{temp_dir}/{self.key}" + os.makedirs(os.path.dirname(file_path), exist_ok=True) + s3.download_file(self.bucket, self.key, file_path) + loader = UnstructuredFileLoader(file_path) + return loader.load() diff --git a/langchain/document_loaders/srt.py b/langchain/document_loaders/srt.py new file mode 100644 index 0000000000000000000000000000000000000000..ce38f1c2f89a1fffe189072fbbc340f4bb2d6a7d --- /dev/null +++ b/langchain/document_loaders/srt.py @@ -0,0 +1,28 @@ +"""Loader for .srt (subtitle) files.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class SRTLoader(BaseLoader): + """Loader for .srt (subtitle) files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + try: + import pysrt # noqa:F401 + except ImportError: + raise ValueError( + "package `pysrt` not found, please install it with `pysrt`" + ) + self.file_path = file_path + + def load(self) -> List[Document]: + """Load using pysrt file.""" + import pysrt + + parsed_info = pysrt.open(self.file_path) + text = " ".join([t.text for t in parsed_info]) + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/telegram.py b/langchain/document_loaders/telegram.py new file mode 100644 index 0000000000000000000000000000000000000000..07f571d77c2961634a404d4a6de612e76c79198c --- /dev/null +++ b/langchain/document_loaders/telegram.py @@ -0,0 +1,54 @@ +"""Loader that loads Telegram chat json dump.""" +import json +from pathlib import Path +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def concatenate_rows(row: dict) -> str: + """Combine message information in a readable format ready to be used.""" + date = row["date"] + sender = row["from"] + text = row["text"] + return f"{sender} on {date}: {text}\n\n" + + +class TelegramChatLoader(BaseLoader): + """Loader that loads Telegram chat json directory dump.""" + + def __init__(self, path: str): + """Initialize with path.""" + self.file_path = path + + def load(self) -> List[Document]: + """Load documents.""" + try: + import pandas as pd + except ImportError: + raise ValueError( + "pandas is needed for Telegram loader, " + "please install with `pip install pandas`" + ) + p = Path(self.file_path) + + with open(p, encoding="utf8") as f: + d = json.load(f) + + normalized_messages = pd.json_normalize(d["messages"]) + df_normalized_messages = pd.DataFrame(normalized_messages) + + # Only keep plain text messages (no services, links, hashtags, code, bold...) + df_filtered = df_normalized_messages[ + (df_normalized_messages.type == "message") + & (df_normalized_messages.text.apply(lambda x: type(x) == str)) + ] + + df_filtered = df_filtered[["date", "text", "from"]] + + text = df_filtered.apply(concatenate_rows, axis=1).str.cat(sep="") + + metadata = {"source": str(p)} + + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/text.py b/langchain/document_loaders/text.py new file mode 100644 index 0000000000000000000000000000000000000000..6962833a2e8d6f2a3c21d5b52ad5cf1ba9cb1fb5 --- /dev/null +++ b/langchain/document_loaders/text.py @@ -0,0 +1,20 @@ +"""Load text files.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class TextLoader(BaseLoader): + """Load text files.""" + + def __init__(self, file_path: str): + """Initialize with file path.""" + self.file_path = file_path + + def load(self) -> List[Document]: + """Load from file path.""" + with open(self.file_path) as f: + text = f.read() + metadata = {"source": self.file_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/unstructured.py b/langchain/document_loaders/unstructured.py new file mode 100644 index 0000000000000000000000000000000000000000..65c455e5c06675b28e2098625830b40a515ff07c --- /dev/null +++ b/langchain/document_loaders/unstructured.py @@ -0,0 +1,115 @@ +"""Loader that uses unstructured to load files.""" +from abc import ABC, abstractmethod +from typing import IO, Any, List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +def satisfies_min_unstructured_version(min_version: str) -> bool: + """Checks to see if the installed unstructured version exceeds the minimum version + for the feature in question.""" + from unstructured.__version__ import __version__ as __unstructured_version__ + + min_version_tuple = tuple([int(x) for x in min_version.split(".")]) + + # NOTE(MthwRobinson) - enables the loader to work when you're using pre-release + # versions of unstructured like 0.4.17-dev1 + _unstructured_version = __unstructured_version__.split("-")[0] + unstructured_version_tuple = tuple( + [int(x) for x in _unstructured_version.split(".")] + ) + + return unstructured_version_tuple >= min_version_tuple + + +class UnstructuredBaseLoader(BaseLoader, ABC): + """Loader that uses unstructured to load files.""" + + def __init__(self, mode: str = "single", **unstructured_kwargs: Any): + """Initialize with file path.""" + try: + import unstructured # noqa:F401 + except ImportError: + raise ValueError( + "unstructured package not found, please install it with " + "`pip install unstructured`" + ) + _valid_modes = {"single", "elements"} + if mode not in _valid_modes: + raise ValueError( + f"Got {mode} for `mode`, but should be one of `{_valid_modes}`" + ) + self.mode = mode + + if not satisfies_min_unstructured_version("0.5.4"): + if "strategy" in unstructured_kwargs: + unstructured_kwargs.pop("strategy") + + self.unstructured_kwargs = unstructured_kwargs + + @abstractmethod + def _get_elements(self) -> List: + """Get elements.""" + + @abstractmethod + def _get_metadata(self) -> dict: + """Get metadata.""" + + def load(self) -> List[Document]: + """Load file.""" + elements = self._get_elements() + if self.mode == "elements": + docs: List[Document] = list() + for element in elements: + metadata = self._get_metadata() + # NOTE(MthwRobinson) - the attribute check is for backward compatibility + # with unstructured<0.4.9. The metadata attributed was added in 0.4.9. + if hasattr(element, "metadata"): + metadata.update(element.metadata.to_dict()) + if hasattr(element, "category"): + metadata["category"] = element.category + docs.append(Document(page_content=str(element), metadata=metadata)) + elif self.mode == "single": + metadata = self._get_metadata() + text = "\n\n".join([str(el) for el in elements]) + docs = [Document(page_content=text, metadata=metadata)] + else: + raise ValueError(f"mode of {self.mode} not supported.") + return docs + + +class UnstructuredFileLoader(UnstructuredBaseLoader): + """Loader that uses unstructured to load files.""" + + def __init__( + self, file_path: str, mode: str = "single", **unstructured_kwargs: Any + ): + """Initialize with file path.""" + self.file_path = file_path + super().__init__(mode=mode, **unstructured_kwargs) + + def _get_elements(self) -> List: + from unstructured.partition.auto import partition + + return partition(filename=self.file_path, **self.unstructured_kwargs) + + def _get_metadata(self) -> dict: + return {"source": self.file_path} + + +class UnstructuredFileIOLoader(UnstructuredBaseLoader): + """Loader that uses unstructured to load file IO objects.""" + + def __init__(self, file: IO, mode: str = "single", **unstructured_kwargs: Any): + """Initialize with file path.""" + self.file = file + super().__init__(mode=mode, **unstructured_kwargs) + + def _get_elements(self) -> List: + from unstructured.partition.auto import partition + + return partition(file=self.file, **self.unstructured_kwargs) + + def _get_metadata(self) -> dict: + return {} diff --git a/langchain/document_loaders/url.py b/langchain/document_loaders/url.py new file mode 100644 index 0000000000000000000000000000000000000000..1b3328496ed96f982bb58d80e6fec6152216e256 --- /dev/null +++ b/langchain/document_loaders/url.py @@ -0,0 +1,32 @@ +"""Loader that uses unstructured to load HTML files.""" +from typing import List + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class UnstructuredURLLoader(BaseLoader): + """Loader that uses unstructured to load HTML files.""" + + def __init__(self, urls: List[str]): + """Initialize with file path.""" + try: + import unstructured # noqa:F401 + except ImportError: + raise ValueError( + "unstructured package not found, please install it with " + "`pip install unstructured`" + ) + self.urls = urls + + def load(self) -> List[Document]: + """Load file.""" + from unstructured.partition.html import partition_html + + docs: List[Document] = list() + for url in self.urls: + elements = partition_html(url=url) + text = "\n\n".join([str(el) for el in elements]) + metadata = {"source": url} + docs.append(Document(page_content=text, metadata=metadata)) + return docs diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py new file mode 100644 index 0000000000000000000000000000000000000000..90616a8cff2683012da00b9b1eb34ed321f4a505 --- /dev/null +++ b/langchain/document_loaders/web_base.py @@ -0,0 +1,60 @@ +"""Web base loader class.""" +import logging +from typing import Any, List, Optional + +import requests + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + +logger = logging.getLogger(__file__) + +default_header_template = { + "User-Agent": "", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*" + ";q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Referer": "https://www.google.com/", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", +} + + +class WebBaseLoader(BaseLoader): + """Loader that uses urllib and beautiful soup to load webpages.""" + + def __init__(self, web_path: str, header_template: Optional[dict] = None): + """Initialize with webpage path.""" + self.web_path = web_path + self.session = requests.Session() + + try: + from fake_useragent import UserAgent + + headers = header_template or default_header_template + headers["User-Agent"] = UserAgent().random + self.session.headers = dict(headers) + except ImportError: + logger.info( + "fake_useragent not found, using default user agent." + "To get a realistic header for requests, `pip install fake_useragent`." + ) + + def _scrape(self, url: str) -> Any: + from bs4 import BeautifulSoup + + html_doc = self.session.get(url) + soup = BeautifulSoup(html_doc.text, "html.parser") + return soup + + def scrape(self) -> Any: + """Scrape data from webpage and return it in BeautifulSoup format.""" + return self._scrape(self.web_path) + + def load(self) -> List[Document]: + """Load data into document objects.""" + soup = self.scrape() + text = soup.get_text() + metadata = {"source": self.web_path} + return [Document(page_content=text, metadata=metadata)] diff --git a/langchain/document_loaders/word_document.py b/langchain/document_loaders/word_document.py new file mode 100644 index 0000000000000000000000000000000000000000..dd9909421ffbf82d456354a7c8ff29ef69cf1306 --- /dev/null +++ b/langchain/document_loaders/word_document.py @@ -0,0 +1,43 @@ +"""Loader that loads word documents.""" +import os +from typing import List + +from langchain.document_loaders.unstructured import UnstructuredFileLoader + + +class UnstructuredWordDocumentLoader(UnstructuredFileLoader): + """Loader that uses unstructured to load word documents.""" + + def _get_elements(self) -> List: + from unstructured.__version__ import __version__ as __unstructured_version__ + from unstructured.file_utils.filetype import FileType, detect_filetype + + unstructured_version = tuple( + [int(x) for x in __unstructured_version__.split(".")] + ) + # NOTE(MthwRobinson) - magic will raise an import error if the libmagic + # system dependency isn't installed. If it's not installed, we'll just + # check the file extension + try: + import magic # noqa: F401 + + is_doc = detect_filetype(self.file_path) == FileType.DOC + except ImportError: + _, extension = os.path.splitext(self.file_path) + is_doc = extension == ".doc" + + if is_doc and unstructured_version < (0, 4, 11): + raise ValueError( + f"You are on unstructured version {__unstructured_version__}. " + "Partitioning .doc files is only supported in unstructured>=0.4.11. " + "Please upgrade the unstructured package and try again." + ) + + if is_doc: + from unstructured.partition.doc import partition_doc + + return partition_doc(filename=self.file_path, **self.unstructured_kwargs) + else: + from unstructured.partition.docx import partition_docx + + return partition_docx(filename=self.file_path, **self.unstructured_kwargs) diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py new file mode 100644 index 0000000000000000000000000000000000000000..03c74022a24a60755cd64a56bb75dffd265f9616 --- /dev/null +++ b/langchain/document_loaders/youtube.py @@ -0,0 +1,325 @@ +"""Loader that loads YouTube transcript.""" +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from pydantic import root_validator +from pydantic.dataclasses import dataclass + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + +SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] + + +@dataclass +class GoogleApiClient: + """A Generic Google Api Client. + + To use, you should have the ``google_auth_oauthlib,youtube_transcript_api,google`` + python package installed. + As the google api expects credentials you need to set up a google account and + register your Service. "https://developers.google.com/docs/api/quickstart/python" + + + + Example: + .. code-block:: python + + from langchain.document_loaders import GoogleApiClient + google_api_client = GoogleApiClient( + service_account_path=Path("path_to_your_sec_file.json") + ) + + """ + + credentials_path: Path = Path.home() / ".credentials" / "credentials.json" + service_account_path: Path = Path.home() / ".credentials" / "credentials.json" + token_path: Path = Path.home() / ".credentials" / "token.json" + + def __post_init__(self) -> None: + self.creds = self._load_credentials() + + @root_validator + def validate_channel_or_videoIds_is_set( + cls, values: Dict[str, Any] + ) -> Dict[str, Any]: + """Validate that either folder_id or document_ids is set, but not both.""" + + if not values.get("credentials_path") and not values.get( + "service_account_path" + ): + raise ValueError("Must specify either channel_name or video_ids") + return values + + def _load_credentials(self) -> Any: + """Load credentials.""" + # Adapted from https://developers.google.com/drive/api/v3/quickstart/python + try: + from google.auth.transport.requests import Request + from google.oauth2 import service_account + from google.oauth2.credentials import Credentials + from google_auth_oauthlib.flow import InstalledAppFlow + from youtube_transcript_api import YouTubeTranscriptApi # noqa: F401 + except ImportError: + raise ImportError( + "You must run" + "`pip install --upgrade " + "google-api-python-client google-auth-httplib2 " + "google-auth-oauthlib" + "youtube-transcript-api`" + "to use the Google Drive loader" + ) + + creds = None + if self.service_account_path.exists(): + return service_account.Credentials.from_service_account_file( + str(self.service_account_path) + ) + if self.token_path.exists(): + creds = Credentials.from_authorized_user_file(str(self.token_path), SCOPES) + + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file( + str(self.credentials_path), SCOPES + ) + creds = flow.run_local_server(port=0) + with open(self.token_path, "w") as token: + token.write(creds.to_json()) + + return creds + + +class YoutubeLoader(BaseLoader): + """Loader that loads Youtube transcripts.""" + + def __init__( + self, video_id: str, add_video_info: bool = False, language: str = "en" + ): + """Initialize with YouTube video ID.""" + self.video_id = video_id + self.add_video_info = add_video_info + self.language = language + + @classmethod + def from_youtube_channel(cls, youtube_url: str, **kwargs: Any) -> YoutubeLoader: + """Given a channel name, load all videos.""" + video_id = youtube_url.split("youtube.com/watch?v=")[-1] + return cls(video_id, **kwargs) + + def load(self) -> List[Document]: + """Load documents.""" + try: + from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi + except ImportError: + raise ImportError( + "Could not import youtube_transcript_api python package. " + "Please it install it with `pip install youtube-transcript-api`." + ) + + metadata = {"source": self.video_id} + + if self.add_video_info: + # Get more video meta info + # Such as title, description, thumbnail url, publish_date + video_info = self._get_video_info() + metadata.update(video_info) + + transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id) + try: + transcript = transcript_list.find_transcript([self.language]) + except NoTranscriptFound: + en_transcript = transcript_list.find_transcript(["en"]) + transcript = en_transcript.translate(self.language) + + transcript_pieces = transcript.fetch() + + transcript = " ".join([t["text"].strip(" ") for t in transcript_pieces]) + + return [Document(page_content=transcript, metadata=metadata)] + + def _get_video_info(self) -> dict: + """Get important video information. + + Components are: + - title + - description + - thumbnail url, + - publish_date + - channel_author + - and more. + """ + try: + from pytube import YouTube + + except ImportError: + raise ImportError( + "Could not import pytube python package. " + "Please it install it with `pip install pytube`." + ) + yt = YouTube(f"https://www.youtube.com/watch?v={self.video_id}") + video_info = { + "title": yt.title, + "description": yt.description, + "view_count": yt.views, + "thumbnail_url": yt.thumbnail_url, + "publish_date": yt.publish_date, + "length": yt.length, + "author": yt.author, + } + return video_info + + +@dataclass +class GoogleApiYoutubeLoader(BaseLoader): + """Loader that loads all Videos from a Channel + + To use, you should have the ``googleapiclient,youtube_transcript_api`` + python package installed. + As the service needs a google_api_client, you first have to initialize + the GoogleApiClient. + + Additionally you have to either provide a channel name or a list of videoids + "https://developers.google.com/docs/api/quickstart/python" + + + + Example: + .. code-block:: python + + from langchain.document_loaders import GoogleApiClient + from langchain.document_loaders import GoogleApiYoutubeLoader + google_api_client = GoogleApiClient( + service_account_path=Path("path_to_your_sec_file.json") + ) + loader = GoogleApiYoutubeLoader( + google_api_client=google_api_client, + channel_name = "CodeAesthetic" + ) + load.load() + + """ + + google_api_client: GoogleApiClient + channel_name: Optional[str] = None + video_ids: Optional[List[str]] = None + add_video_info: bool = True + captions_language: str = "en" + + def __post_init__(self) -> None: + self.youtube_client = self._build_youtube_client(self.google_api_client.creds) + + def _build_youtube_client(self, creds: Any) -> Any: + try: + from googleapiclient.discovery import build + from youtube_transcript_api import YouTubeTranscriptApi # noqa: F401 + except ImportError: + raise ImportError( + "You must run" + "`pip install --upgrade " + "google-api-python-client google-auth-httplib2 " + "google-auth-oauthlib" + "youtube-transcript-api`" + "to use the Google Drive loader" + ) + + return build("youtube", "v3", credentials=creds) + + @root_validator + def validate_channel_or_videoIds_is_set( + cls, values: Dict[str, Any] + ) -> Dict[str, Any]: + """Validate that either folder_id or document_ids is set, but not both.""" + if not values.get("channel_name") and not values.get("video_ids"): + raise ValueError("Must specify either channel_name or video_ids") + return values + + def _get_transcripe_for_video_id(self, video_id: str) -> str: + from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi + + transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_ids) + try: + transcript = transcript_list.find_transcript([self.captions_language]) + except NoTranscriptFound: + en_transcript = transcript_list.find_transcript(["en"]) + transcript = en_transcript.translate(self.captions_language) + + transcript_pieces = transcript.fetch() + return " ".join([t["text"].strip(" ") for t in transcript_pieces]) + + def _get_document_for_video_id(self, video_id: str, **kwargs: Any) -> Document: + captions = self._get_transcripe_for_video_id(video_id) + video_response = ( + self.youtube_client.videos() + .list( + part="id,snippet", + id=video_id, + ) + .execute() + ) + return Document( + page_content=captions, + metadata=video_response.get("items")[0], + ) + + def _get_channel_id(self, channel_name: str) -> str: + request = self.youtube_client.search().list( + part="id", + q=channel_name, + type="channel", + maxResults=1, # we only need one result since channel names are unique + ) + response = request.execute() + channel_id = response["items"][0]["id"]["channelId"] + return channel_id + + def _get_document_for_channel(self, channel: str, **kwargs: Any) -> List[Document]: + channel_id = self._get_channel_id(channel) + request = self.youtube_client.search().list( + part="id,snippet", + channelId=channel_id, + maxResults=50, # adjust this value to retrieve more or fewer videos + ) + video_ids = [] + while request is not None: + response = request.execute() + + # Add each video ID to the list + for item in response["items"]: + if not item["id"].get("videoId"): + continue + meta_data = {"videoId": item["id"]["videoId"]} + if self.add_video_info: + item["snippet"].pop("thumbnails") + meta_data.update(item["snippet"]) + video_ids.append( + Document( + page_content=self._get_transcripe_for_video_id( + item["id"]["videoId"] + ), + metadata=meta_data, + ) + ) + request = self.youtube_client.search().list_next(request, response) + + return video_ids + + def load(self) -> List[Document]: + """Load documents.""" + document_list = [] + if self.channel_name: + document_list.extend(self._get_document_for_channel(self.channel_name)) + elif self.video_ids: + document_list.extend( + [ + self._get_document_for_video_id(video_id) + for video_id in self.video_ids + ] + ) + else: + raise ValueError("Must specify either channel_name or video_ids") + return document_list diff --git a/langchain/embeddings/__init__.py b/langchain/embeddings/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..acfdb1418bf41b01ac8c6124dd2b3f06fac4257f --- /dev/null +++ b/langchain/embeddings/__init__.py @@ -0,0 +1,57 @@ +"""Wrappers around embedding modules.""" +import logging +from typing import Any + +from langchain.embeddings.cohere import CohereEmbeddings +from langchain.embeddings.fake import FakeEmbeddings +from langchain.embeddings.huggingface import ( + HuggingFaceEmbeddings, + HuggingFaceInstructEmbeddings, +) +from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings +from langchain.embeddings.self_hosted import SelfHostedEmbeddings +from langchain.embeddings.self_hosted_hugging_face import ( + SelfHostedHuggingFaceEmbeddings, + SelfHostedHuggingFaceInstructEmbeddings, +) +from langchain.embeddings.tensorflow_hub import TensorflowHubEmbeddings + +logger = logging.getLogger(__name__) + +__all__ = [ + "OpenAIEmbeddings", + "HuggingFaceEmbeddings", + "CohereEmbeddings", + "HuggingFaceHubEmbeddings", + "TensorflowHubEmbeddings", + "SagemakerEndpointEmbeddings", + "HuggingFaceInstructEmbeddings", + "SelfHostedEmbeddings", + "SelfHostedHuggingFaceEmbeddings", + "SelfHostedHuggingFaceInstructEmbeddings", + "FakeEmbeddings", +] + + +# TODO: this is in here to maintain backwards compatibility +class HypotheticalDocumentEmbedder: + def __init__(self, *args: Any, **kwargs: Any): + logger.warning( + "Using a deprecated class. Please use " + "`from langchain.chains import HypotheticalDocumentEmbedder` instead" + ) + from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H + + return H(*args, **kwargs) # type: ignore + + @classmethod + def from_llm(cls, *args: Any, **kwargs: Any) -> Any: + logger.warning( + "Using a deprecated class. Please use " + "`from langchain.chains import HypotheticalDocumentEmbedder` instead" + ) + from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H + + return H.from_llm(*args, **kwargs) diff --git a/langchain/embeddings/__pycache__/__init__.cpython-39.pyc b/langchain/embeddings/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1301b0ab385de3ac5d14035ed323dae06a2a215 Binary files /dev/null and b/langchain/embeddings/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/base.cpython-39.pyc b/langchain/embeddings/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4e246961cd61197e12cba345f552490a56ea795 Binary files /dev/null and b/langchain/embeddings/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/cohere.cpython-39.pyc b/langchain/embeddings/__pycache__/cohere.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fecf0611f1caddebae81a0f3f010c601c37d0f6d Binary files /dev/null and b/langchain/embeddings/__pycache__/cohere.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/fake.cpython-39.pyc b/langchain/embeddings/__pycache__/fake.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b871d4b7a792182007abcfd8a693ad38ec06680 Binary files /dev/null and b/langchain/embeddings/__pycache__/fake.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/huggingface.cpython-39.pyc b/langchain/embeddings/__pycache__/huggingface.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..982ca2d0f1746d98282eadcbdcce50dab6c91f18 Binary files /dev/null and b/langchain/embeddings/__pycache__/huggingface.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/huggingface_hub.cpython-39.pyc b/langchain/embeddings/__pycache__/huggingface_hub.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61b4bcd77f0a7bbb3bf096b8ac3bcf03c713fb47 Binary files /dev/null and b/langchain/embeddings/__pycache__/huggingface_hub.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/openai.cpython-39.pyc b/langchain/embeddings/__pycache__/openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26f63d5aa625bd6f295fe028804dfc7d4f463474 Binary files /dev/null and b/langchain/embeddings/__pycache__/openai.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/sagemaker_endpoint.cpython-39.pyc b/langchain/embeddings/__pycache__/sagemaker_endpoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b84bd9a36a54949f001369e05a06803be923693 Binary files /dev/null and b/langchain/embeddings/__pycache__/sagemaker_endpoint.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/self_hosted.cpython-39.pyc b/langchain/embeddings/__pycache__/self_hosted.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaede28293d464be96182f0533c95acc7f2ed7fd Binary files /dev/null and b/langchain/embeddings/__pycache__/self_hosted.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/self_hosted_hugging_face.cpython-39.pyc b/langchain/embeddings/__pycache__/self_hosted_hugging_face.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ff5a76bb02d42323a8fca90ca64201f1155d7b3 Binary files /dev/null and b/langchain/embeddings/__pycache__/self_hosted_hugging_face.cpython-39.pyc differ diff --git a/langchain/embeddings/__pycache__/tensorflow_hub.cpython-39.pyc b/langchain/embeddings/__pycache__/tensorflow_hub.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42dad2eb71cebecc2f1093fdbdd4588aad9b95fc Binary files /dev/null and b/langchain/embeddings/__pycache__/tensorflow_hub.cpython-39.pyc differ diff --git a/langchain/embeddings/base.py b/langchain/embeddings/base.py new file mode 100644 index 0000000000000000000000000000000000000000..4a56cd6acb8b3d5dce2dc9dbeea950bb95e32467 --- /dev/null +++ b/langchain/embeddings/base.py @@ -0,0 +1,15 @@ +"""Interface for embedding models.""" +from abc import ABC, abstractmethod +from typing import List + + +class Embeddings(ABC): + """Interface for embedding models.""" + + @abstractmethod + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed search docs.""" + + @abstractmethod + def embed_query(self, text: str) -> List[float]: + """Embed query text.""" diff --git a/langchain/embeddings/cohere.py b/langchain/embeddings/cohere.py new file mode 100644 index 0000000000000000000000000000000000000000..b5dda307c3ab9fe966fed1f8a8cbbd651d13bd00 --- /dev/null +++ b/langchain/embeddings/cohere.py @@ -0,0 +1,81 @@ +"""Wrapper around Cohere embedding models.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env + + +class CohereEmbeddings(BaseModel, Embeddings): + """Wrapper around Cohere embedding models. + + To use, you should have the ``cohere`` python package installed, and the + environment variable ``COHERE_API_KEY`` set with your API key or pass it + as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.embeddings import CohereEmbeddings + cohere = CohereEmbeddings(model="medium", cohere_api_key="my-api-key") + """ + + client: Any #: :meta private: + model: str = "large" + """Model name to use.""" + + truncate: Optional[str] = None + """Truncate embeddings that are too long from start or end ("NONE"|"START"|"END")""" + + cohere_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + cohere_api_key = get_from_dict_or_env( + values, "cohere_api_key", "COHERE_API_KEY" + ) + try: + import cohere + + values["client"] = cohere.Client(cohere_api_key) + except ImportError: + raise ValueError( + "Could not import cohere python package. " + "Please it install it with `pip install cohere`." + ) + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Call out to Cohere's embedding endpoint. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + embeddings = self.client.embed( + model=self.model, texts=texts, truncate=self.truncate + ).embeddings + return [list(map(float, e)) for e in embeddings] + + def embed_query(self, text: str) -> List[float]: + """Call out to Cohere's embedding endpoint. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + embedding = self.client.embed( + model=self.model, texts=[text], truncate=self.truncate + ).embeddings[0] + return list(map(float, embedding)) diff --git a/langchain/embeddings/fake.py b/langchain/embeddings/fake.py new file mode 100644 index 0000000000000000000000000000000000000000..9328f927e261f1f97e32fbebe1c913b4277d3bf3 --- /dev/null +++ b/langchain/embeddings/fake.py @@ -0,0 +1,19 @@ +from typing import List + +import numpy as np +from pydantic import BaseModel + +from langchain.embeddings.base import Embeddings + + +class FakeEmbeddings(Embeddings, BaseModel): + size: int + + def _get_embedding(self) -> List[float]: + return list(np.random.normal(size=self.size)) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding() for _ in texts] + + def embed_query(self, text: str) -> List[float]: + return self._get_embedding() diff --git a/langchain/embeddings/huggingface.py b/langchain/embeddings/huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..bc41fcf3bd7ef25d56bd707722b53b23df6e20b4 --- /dev/null +++ b/langchain/embeddings/huggingface.py @@ -0,0 +1,139 @@ +"""Wrapper around HuggingFace embedding models.""" +from typing import Any, List + +from pydantic import BaseModel, Extra + +from langchain.embeddings.base import Embeddings + +DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" +DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" +DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " +DEFAULT_QUERY_INSTRUCTION = ( + "Represent the question for retrieving supporting documents: " +) + + +class HuggingFaceEmbeddings(BaseModel, Embeddings): + """Wrapper around sentence_transformers embedding models. + + To use, you should have the ``sentence_transformers`` python package installed. + + Example: + .. code-block:: python + + from langchain.embeddings import HuggingFaceEmbeddings + model_name = "sentence-transformers/all-mpnet-base-v2" + hf = HuggingFaceEmbeddings(model_name=model_name) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_MODEL_NAME + """Model name to use.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + import sentence_transformers + + self.client = sentence_transformers.SentenceTransformer(self.model_name) + except ImportError: + raise ValueError( + "Could not import sentence_transformers python package. " + "Please install it with `pip install sentence_transformers`." + ) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + texts = list(map(lambda x: x.replace("\n", " "), texts)) + embeddings = self.client.encode(texts) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embedding = self.client.encode(text) + return embedding.tolist() + + +class HuggingFaceInstructEmbeddings(BaseModel, Embeddings): + """Wrapper around sentence_transformers embedding models. + + To use, you should have the ``sentence_transformers`` + and ``InstructorEmbedding`` python package installed. + + Example: + .. code-block:: python + + from langchain.embeddings import HuggingFaceInstructEmbeddings + model_name = "hkunlp/instructor-large" + hf = HuggingFaceInstructEmbeddings(model_name=model_name) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_INSTRUCT_MODEL + """Model name to use.""" + embed_instruction: str = DEFAULT_EMBED_INSTRUCTION + """Instruction to use for embedding documents.""" + query_instruction: str = DEFAULT_QUERY_INSTRUCTION + """Instruction to use for embedding query.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + from InstructorEmbedding import INSTRUCTOR + + self.client = INSTRUCTOR(self.model_name) + except ImportError as e: + raise ValueError("Dependencies for InstructorEmbedding not found.") from e + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace instruct model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [[self.embed_instruction, text] for text in texts] + embeddings = self.client.encode(instruction_pairs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace instruct model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = [self.query_instruction, text] + embedding = self.client.encode([instruction_pair])[0] + return embedding.tolist() diff --git a/langchain/embeddings/huggingface_hub.py b/langchain/embeddings/huggingface_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..66c662f0554b570275f78a783aea6a7e07784b1a --- /dev/null +++ b/langchain/embeddings/huggingface_hub.py @@ -0,0 +1,105 @@ +"""Wrapper around HuggingFace Hub embedding models.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env + +DEFAULT_REPO_ID = "sentence-transformers/all-mpnet-base-v2" +VALID_TASKS = ("feature-extraction",) + + +class HuggingFaceHubEmbeddings(BaseModel, Embeddings): + """Wrapper around HuggingFaceHub embedding models. + + To use, you should have the ``huggingface_hub`` python package installed, and the + environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.embeddings import HuggingFaceHubEmbeddings + repo_id = "sentence-transformers/all-mpnet-base-v2" + hf = HuggingFaceHubEmbeddings( + repo_id=repo_id, + task="feature-extraction", + huggingfacehub_api_token="my-api-key", + ) + """ + + client: Any #: :meta private: + repo_id: str = DEFAULT_REPO_ID + """Model name to use.""" + task: Optional[str] = "feature-extraction" + """Task to call the model with.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + + huggingfacehub_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + try: + from huggingface_hub.inference_api import InferenceApi + + repo_id = values["repo_id"] + if not repo_id.startswith("sentence-transformers"): + raise ValueError( + "Currently only 'sentence-transformers' embedding models " + f"are supported. Got invalid 'repo_id' {repo_id}." + ) + client = InferenceApi( + repo_id=repo_id, + token=huggingfacehub_api_token, + task=values.get("task"), + ) + if client.task not in VALID_TASKS: + raise ValueError( + f"Got invalid task {client.task}, " + f"currently only {VALID_TASKS} are supported" + ) + values["client"] = client + except ImportError: + raise ValueError( + "Could not import huggingface_hub python package. " + "Please it install it with `pip install huggingface_hub`." + ) + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Call out to HuggingFaceHub's embedding endpoint for embedding search docs. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + # replace newlines, which can negatively affect performance. + texts = [text.replace("\n", " ") for text in texts] + _model_kwargs = self.model_kwargs or {} + responses = self.client(inputs=texts, params=_model_kwargs) + return responses + + def embed_query(self, text: str) -> List[float]: + """Call out to HuggingFaceHub's embedding endpoint for embedding query text. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + response = self.embed_documents([text])[0] + return response diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py new file mode 100644 index 0000000000000000000000000000000000000000..20351fd43972b33f314f361557c4ddd684267471 --- /dev/null +++ b/langchain/embeddings/openai.py @@ -0,0 +1,272 @@ +"""Wrapper around OpenAI embedding models.""" +from __future__ import annotations + +import logging +from typing import Any, Callable, Dict, List, Optional + +import numpy as np +from pydantic import BaseModel, Extra, root_validator +from tenacity import ( + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +def _create_retry_decorator(embeddings: OpenAIEmbeddings) -> Callable[[Any], Any]: + import openai + + min_seconds = 4 + max_seconds = 10 + # Wait 2^x * 1 second between each retry starting with + # 4 seconds, then up to 10 seconds, then 10 seconds afterwards + return retry( + reraise=True, + stop=stop_after_attempt(embeddings.max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=( + retry_if_exception_type(openai.error.Timeout) + | retry_if_exception_type(openai.error.APIError) + | retry_if_exception_type(openai.error.APIConnectionError) + | retry_if_exception_type(openai.error.RateLimitError) + | retry_if_exception_type(openai.error.ServiceUnavailableError) + ), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + + +def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any: + """Use tenacity to retry the completion call.""" + retry_decorator = _create_retry_decorator(embeddings) + + @retry_decorator + def _completion_with_retry(**kwargs: Any) -> Any: + return embeddings.client.create(**kwargs) + + return _completion_with_retry(**kwargs) + + +class OpenAIEmbeddings(BaseModel, Embeddings): + """Wrapper around OpenAI embedding models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key or pass it + as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.embeddings import OpenAIEmbeddings + openai = OpenAIEmbeddings(openai_api_key="my-api-key") + + In order to use the library with Microsoft Azure endpoints, you need to set + the OPENAI_API_TYPE, OPENAI_API_BASE, OPENAI_API_KEY and optionally and + API_VERSION. + The OPENAI_API_TYPE must be set to 'azure' and the others correspond to + the properties of your endpoint. + In addition, the deployment name must be passed as the model parameter. + + Example: + .. code-block:: python + + import os + os.environ["OPENAI_API_TYPE"] = "azure" + os.environ["OPENAI_API_BASE"] = "https:// Dict: + # model_name is for first generation, and model is for second generation. + # Both are not allowed together. + if "model_name" in values and "model" in values: + raise ValueError( + "Both `model_name` and `model` were provided, " + "but only one should be." + ) + + """Get model names from just old model name.""" + if "model_name" in values: + if "document_model_name" in values: + raise ValueError( + "Both `model_name` and `document_model_name` were provided, " + "but only one should be." + ) + if "query_model_name" in values: + raise ValueError( + "Both `model_name` and `query_model_name` were provided, " + "but only one should be." + ) + model_name = values.pop("model_name") + values["document_model_name"] = f"text-search-{model_name}-doc-001" + values["query_model_name"] = f"text-search-{model_name}-query-001" + + # Set document/query model names from model parameter. + if "model" in values: + if "document_model_name" in values: + raise ValueError( + "Both `model` and `document_model_name` were provided, " + "but only one should be." + ) + if "query_model_name" in values: + raise ValueError( + "Both `model` and `query_model_name` were provided, " + "but only one should be." + ) + model = values.get("model") + values["document_model_name"] = model + values["query_model_name"] = model + + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + values["client"] = openai.Embedding + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + return values + + # please refer to + # https://github.com/openai/openai-cookbook/blob/main/examples/Embedding_long_inputs.ipynb + def _get_len_safe_embeddings( + self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None + ) -> List[List[float]]: + embeddings: List[List[float]] = [[] for i in range(len(texts))] + try: + import tiktoken + + tokens = [] + indices = [] + encoding = tiktoken.model.encoding_for_model(self.document_model_name) + for i, text in enumerate(texts): + # replace newlines, which can negatively affect performance. + text = text.replace("\n", " ") + token = encoding.encode(text) + for j in range(0, len(token), self.embedding_ctx_length): + tokens += [token[j : j + self.embedding_ctx_length]] + indices += [i] + + batched_embeddings = [] + _chunk_size = chunk_size or self.chunk_size + for i in range(0, len(tokens), _chunk_size): + response = embed_with_retry( + self, + input=tokens[i : i + _chunk_size], + engine=self.document_model_name, + ) + batched_embeddings += [r["embedding"] for r in response["data"]] + + results: List[List[List[float]]] = [[] for i in range(len(texts))] + lens: List[List[int]] = [[] for i in range(len(texts))] + for i in range(len(indices)): + results[indices[i]].append(batched_embeddings[i]) + lens[indices[i]].append(len(batched_embeddings[i])) + + for i in range(len(texts)): + average = np.average(results[i], axis=0, weights=lens[i]) + embeddings[i] = (average / np.linalg.norm(average)).tolist() + + return embeddings + + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to for OpenAIEmbeddings. " + "Please it install it with `pip install tiktoken`." + ) + + def _embedding_func(self, text: str, *, engine: str) -> List[float]: + """Call out to OpenAI's embedding endpoint.""" + # replace newlines, which can negatively affect performance. + if self.embedding_ctx_length > 0: + return self._get_len_safe_embeddings([text], engine=engine)[0] + else: + text = text.replace("\n", " ") + return embed_with_retry(self, input=[text], engine=engine)["data"][0][ + "embedding" + ] + + def embed_documents( + self, texts: List[str], chunk_size: Optional[int] = 0 + ) -> List[List[float]]: + """Call out to OpenAI's embedding endpoint for embedding search docs. + + Args: + texts: The list of texts to embed. + chunk_size: The chunk size of embeddings. If None, will use the chunk size + specified by the class. + + Returns: + List of embeddings, one for each text. + """ + # handle large batches of texts + if self.embedding_ctx_length > 0: + return self._get_len_safe_embeddings(texts, engine=self.document_model_name) + else: + results = [] + _chunk_size = chunk_size or self.chunk_size + for i in range(0, len(texts), _chunk_size): + response = embed_with_retry( + self, + input=texts[i : i + _chunk_size], + engine=self.document_model_name, + ) + results += [r["embedding"] for r in response["data"]] + return results + + def embed_query(self, text: str) -> List[float]: + """Call out to OpenAI's embedding endpoint for embedding query text. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + embedding = self._embedding_func(text, engine=self.query_model_name) + return embedding diff --git a/langchain/embeddings/sagemaker_endpoint.py b/langchain/embeddings/sagemaker_endpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc221873cfa501b3e3c5cf123fc73fbd98ca660 --- /dev/null +++ b/langchain/embeddings/sagemaker_endpoint.py @@ -0,0 +1,194 @@ +"""Wrapper around Sagemaker InvokeEndpoint API.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.embeddings.base import Embeddings +from langchain.llms.sagemaker_endpoint import ContentHandlerBase + + +class SagemakerEndpointEmbeddings(BaseModel, Embeddings): + """Wrapper around custom Sagemaker Inference Endpoints. + + To use, you must supply the endpoint name from your deployed + Sagemaker model & the region where it is deployed. + + To authenticate, the AWS client uses the following methods to + automatically load credentials: + https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + + If a specific credential profile should be used, you must pass + the name of the profile from the ~/.aws/credentials file that is to be used. + + Make sure the credentials / roles used have the required policies to + access the Sagemaker endpoint. + See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html + """ + + """ + Example: + .. code-block:: python + + from langchain.embeddings import SagemakerEndpointEmbeddings + endpoint_name = ( + "my-endpoint-name" + ) + region_name = ( + "us-west-2" + ) + credentials_profile_name = ( + "default" + ) + se = SagemakerEndpointEmbeddings( + endpoint_name=endpoint_name, + region_name=region_name, + credentials_profile_name=credentials_profile_name + ) + """ + client: Any #: :meta private: + + endpoint_name: str = "" + """The name of the endpoint from the deployed Sagemaker model. + Must be unique within an AWS Region.""" + + region_name: str = "" + """The aws region where the Sagemaker model is deployed, eg. `us-west-2`.""" + + credentials_profile_name: Optional[str] = None + """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which + has either access keys or role information specified. + If not specified, the default credential profile or, if on an EC2 instance, + credentials from IMDS will be used. + See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + """ + + content_handler: ContentHandlerBase + """The content handler class that provides an input and + output transform functions to handle formats between LLM + and the endpoint. + """ + + """ + Example: + .. code-block:: python + + from langchain.llms.sagemaker_endpoint import ContentHandlerBase + + class ContentHandler(ContentHandlerBase): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: + input_str = json.dumps({prompt: prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json[0]["generated_text"] + """ + + model_kwargs: Optional[Dict] = None + """Key word arguments to pass to the model.""" + + endpoint_kwargs: Optional[Dict] = None + """Optional attributes passed to the invoke_endpoint + function. See `boto3`_. docs for more info. + .. _boto3: + """ + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that AWS credentials to and python package exists in environment.""" + try: + import boto3 + + try: + if values["credentials_profile_name"] is not None: + session = boto3.Session( + profile_name=values["credentials_profile_name"] + ) + else: + # use default credentials + session = boto3.Session() + + values["client"] = session.client( + "sagemaker-runtime", region_name=values["region_name"] + ) + + except Exception as e: + raise ValueError( + "Could not load credentials to authenticate with AWS client. " + "Please check that credentials in the specified " + "profile name are valid." + ) from e + + except ImportError: + raise ValueError( + "Could not import boto3 python package. " + "Please it install it with `pip install boto3`." + ) + return values + + def _embedding_func(self, texts: List[str]) -> List[float]: + """Call out to SageMaker Inference embedding endpoint.""" + # replace newlines, which can negatively affect performance. + texts = list(map(lambda x: x.replace("\n", " "), texts)) + _model_kwargs = self.model_kwargs or {} + _endpoint_kwargs = self.endpoint_kwargs or {} + + body = self.content_handler.transform_input(texts, _model_kwargs) + content_type = self.content_handler.content_type + accepts = self.content_handler.accepts + + # send request + try: + response = self.client.invoke_endpoint( + EndpointName=self.endpoint_name, + Body=body, + ContentType=content_type, + Accept=accepts, + **_endpoint_kwargs, + ) + except Exception as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + return self.content_handler.transform_output(response["Body"]) + + def embed_documents( + self, texts: List[str], chunk_size: int = 64 + ) -> List[List[float]]: + """Compute doc embeddings using a SageMaker Inference Endpoint. + + Args: + texts: The list of texts to embed. + chunk_size: The chunk size defines how many input texts will + be grouped together as request. If None, will use the + chunk size specified by the class. + + + Returns: + List of embeddings, one for each text. + """ + results = [] + _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size + for i in range(0, len(texts), _chunk_size): + response = self._embedding_func(texts[i : i + _chunk_size]) + results.append(response) + return results + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a SageMaker inference endpoint. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self._embedding_func([text]) diff --git a/langchain/embeddings/self_hosted.py b/langchain/embeddings/self_hosted.py new file mode 100644 index 0000000000000000000000000000000000000000..7e05617e25ddcf55fad33fddf30b2c0517874871 --- /dev/null +++ b/langchain/embeddings/self_hosted.py @@ -0,0 +1,103 @@ +"""Running custom embedding models on self-hosted remote hardware.""" +from typing import Any, Callable, List + +from pydantic import BaseModel, Extra + +from langchain.embeddings.base import Embeddings +from langchain.llms import SelfHostedPipeline + + +def _embed_documents(pipeline: Any, *args: Any, **kwargs: Any) -> List[List[float]]: + """Inference function to send to the remote hardware. + + Accepts a sentence_transformer model_id and + returns a list of embeddings for each document in the batch. + """ + return pipeline(*args, **kwargs) + + +class SelfHostedEmbeddings(SelfHostedPipeline, Embeddings, BaseModel): + """Runs custom embedding models on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another + cloud like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example using a model load function: + .. code-block:: python + + from langchain.embeddings import SelfHostedEmbeddings + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + import runhouse as rh + + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + def get_pipeline(): + model_id = "facebook/bart-large" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + return pipeline("feature-extraction", model=model, tokenizer=tokenizer) + embeddings = SelfHostedEmbeddings( + model_load_fn=get_pipeline, + hardware=gpu + model_reqs=["./", "torch", "transformers"], + ) + Example passing in a pipeline path: + .. code-block:: python + + from langchain.embeddings import SelfHostedHFEmbeddings + import runhouse as rh + from transformers import pipeline + + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + pipeline = pipeline(model="bert-base-uncased", task="feature-extraction") + rh.blob(pickle.dumps(pipeline), + path="models/pipeline.pkl").save().to(gpu, path="models") + embeddings = SelfHostedHFEmbeddings.from_pipeline( + pipeline="models/pipeline.pkl", + hardware=gpu, + model_reqs=["./", "torch", "transformers"], + ) + """ + + inference_fn: Callable = _embed_documents + """Inference function to extract the embeddings on the remote hardware.""" + inference_kwargs: Any = None + """Any kwargs to pass to the model's inference function.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed.s + + Returns: + List of embeddings, one for each text. + """ + texts = list(map(lambda x: x.replace("\n", " "), texts)) + embeddings = self.client(self.pipeline_ref, texts) + if not isinstance(embeddings, list): + return embeddings.tolist() + return embeddings + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embeddings = self.client(self.pipeline_ref, text) + if not isinstance(embeddings, list): + return embeddings.tolist() + return embeddings diff --git a/langchain/embeddings/self_hosted_hugging_face.py b/langchain/embeddings/self_hosted_hugging_face.py new file mode 100644 index 0000000000000000000000000000000000000000..7675d1e4a04448f5e0ccfc6660fbfe5fc8b1be73 --- /dev/null +++ b/langchain/embeddings/self_hosted_hugging_face.py @@ -0,0 +1,171 @@ +"""Wrapper around HuggingFace embedding models for self-hosted remote hardware.""" +import importlib +import logging +from typing import Any, Callable, List, Optional + +from pydantic import BaseModel + +from langchain.embeddings.self_hosted import SelfHostedEmbeddings + +DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" +DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" +DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " +DEFAULT_QUERY_INSTRUCTION = ( + "Represent the question for retrieving supporting documents: " +) + +logger = logging.getLogger(__name__) + + +def _embed_documents(client: Any, *args: Any, **kwargs: Any) -> List[List[float]]: + """Inference function to send to the remote hardware. + + Accepts a sentence_transformer model_id and + returns a list of embeddings for each document in the batch. + """ + return client.encode(*args, **kwargs) + + +def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any: + """Load the embedding model.""" + if not instruct: + import sentence_transformers + + client = sentence_transformers.SentenceTransformer(model_id) + else: + from InstructorEmbedding import INSTRUCTOR + + client = INSTRUCTOR(model_id) + + if importlib.util.find_spec("torch") is not None: + import torch + + cuda_device_count = torch.cuda.device_count() + if device < -1 or (device >= cuda_device_count): + raise ValueError( + f"Got device=={device}, " + f"device is required to be within [-1, {cuda_device_count})" + ) + if device < 0 and cuda_device_count > 0: + logger.warning( + "Device has %d GPUs available. " + "Provide device={deviceId} to `from_model_id` to use available" + "GPUs for execution. deviceId is -1 for CPU and " + "can be a positive integer associated with CUDA device id.", + cuda_device_count, + ) + + client = client.to(device) + return client + + +class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings, BaseModel): + """Runs sentence_transformers embedding models on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another cloud + like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example: + .. code-block:: python + + from langchain.embeddings import SelfHostedHuggingFaceEmbeddings + import runhouse as rh + model_name = "sentence-transformers/all-mpnet-base-v2" + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu) + """ + + client: Any #: :meta private: + model_id: str = DEFAULT_MODEL_NAME + """Model name to use.""" + model_reqs: List[str] = ["./", "sentence_transformers", "torch"] + """Requirements to install on hardware to inference the model.""" + hardware: Any + """Remote hardware to send the inference function to.""" + model_load_fn: Callable = load_embedding_model + """Function to load the model remotely on the server.""" + load_fn_kwargs: Optional[dict] = None + """Key word arguments to pass to the model load function.""" + inference_fn: Callable = _embed_documents + """Inference function to extract the embeddings.""" + + def __init__(self, **kwargs: Any): + """Initialize the remote inference function.""" + load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) + load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME) + load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False) + load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) + super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) + + +class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings): + """Runs InstructorEmbedding embedding models on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another + cloud like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example: + .. code-block:: python + + from langchain.embeddings import SelfHostedHuggingFaceInstructEmbeddings + import runhouse as rh + model_name = "hkunlp/instructor-large" + gpu = rh.cluster(name='rh-a10x', instance_type='A100:1') + hf = SelfHostedHuggingFaceInstructEmbeddings( + model_name=model_name, hardware=gpu) + """ + + model_id: str = DEFAULT_INSTRUCT_MODEL + """Model name to use.""" + embed_instruction: str = DEFAULT_EMBED_INSTRUCTION + """Instruction to use for embedding documents.""" + query_instruction: str = DEFAULT_QUERY_INSTRUCTION + """Instruction to use for embedding query.""" + model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"] + """Requirements to install on hardware to inference the model.""" + + def __init__(self, **kwargs: Any): + """Initialize the remote inference function.""" + load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) + load_fn_kwargs["model_id"] = load_fn_kwargs.get( + "model_id", DEFAULT_INSTRUCT_MODEL + ) + load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True) + load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) + super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace instruct model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [] + for text in texts: + instruction_pairs.append([self.embed_instruction, text]) + embeddings = self.client(self.pipeline_ref, instruction_pairs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace instruct model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = [self.query_instruction, text] + embedding = self.client(self.pipeline_ref, [instruction_pair])[0] + return embedding.tolist() diff --git a/langchain/embeddings/tensorflow_hub.py b/langchain/embeddings/tensorflow_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..25e63949c4df7f65512744b8e19de92ea8766396 --- /dev/null +++ b/langchain/embeddings/tensorflow_hub.py @@ -0,0 +1,70 @@ +"""Wrapper around TensorflowHub embedding models.""" +from typing import Any, List + +from pydantic import BaseModel, Extra + +from langchain.embeddings.base import Embeddings + +DEFAULT_MODEL_URL = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" + + +class TensorflowHubEmbeddings(BaseModel, Embeddings): + """Wrapper around tensorflow_hub embedding models. + + To use, you should have the ``tensorflow_text`` python package installed. + + Example: + .. code-block:: python + + from langchain.embeddings import TensorflowHubEmbeddings + url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" + tf = TensorflowHubEmbeddings(model_url=url) + """ + + embed: Any #: :meta private: + model_url: str = DEFAULT_MODEL_URL + """Model name to use.""" + + def __init__(self, **kwargs: Any): + """Initialize the tensorflow_hub and tensorflow_text.""" + super().__init__(**kwargs) + try: + import tensorflow_hub + import tensorflow_text # noqa + + self.embed = tensorflow_hub.load(self.model_url) + except ImportError as e: + raise ValueError( + "Could not import some python packages." "Please install them." + ) from e + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a TensorflowHub embedding model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + texts = list(map(lambda x: x.replace("\n", " "), texts)) + embeddings = self.embed(texts).numpy() + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a TensorflowHub embedding model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embedding = self.embed(text).numpy()[0] + return embedding.tolist() diff --git a/langchain/evaluation/__init__.py b/langchain/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4714192ab272c77ee5ba926268b98932632d09cc --- /dev/null +++ b/langchain/evaluation/__init__.py @@ -0,0 +1 @@ +"""[BETA] Functionality relating to evaluation.""" diff --git a/langchain/evaluation/loading.py b/langchain/evaluation/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..613e261303bbfeeccb63eaa8ec5bd26bfb7b6afb --- /dev/null +++ b/langchain/evaluation/loading.py @@ -0,0 +1,8 @@ +from typing import Dict, List + + +def load_dataset(uri: str) -> List[Dict]: + from datasets import load_dataset + + dataset = load_dataset(f"LangChainDatasets/{uri}") + return [d for d in dataset["train"]] diff --git a/langchain/evaluation/qa/__init__.py b/langchain/evaluation/qa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..728633169f23b740cc3dffb20a51d02386345f0b --- /dev/null +++ b/langchain/evaluation/qa/__init__.py @@ -0,0 +1,5 @@ +"""Chains and utils related to evaluating question answering functionality.""" +from langchain.evaluation.qa.eval_chain import QAEvalChain +from langchain.evaluation.qa.generate_chain import QAGenerateChain + +__all__ = ["QAEvalChain", "QAGenerateChain"] diff --git a/langchain/evaluation/qa/eval_chain.py b/langchain/evaluation/qa/eval_chain.py new file mode 100644 index 0000000000000000000000000000000000000000..382f9f553cd7e0181dcf1da66a810230f771551c --- /dev/null +++ b/langchain/evaluation/qa/eval_chain.py @@ -0,0 +1,60 @@ +"""LLM Chain specifically for evaluating question answering.""" +from __future__ import annotations + +from typing import Any, List + +from langchain import PromptTemplate +from langchain.chains.llm import LLMChain +from langchain.evaluation.qa.eval_prompt import PROMPT +from langchain.llms.base import BaseLLM + + +class QAEvalChain(LLMChain): + """LLM Chain specifically for evaluating question answering.""" + + @classmethod + def from_llm( + cls, llm: BaseLLM, prompt: PromptTemplate = PROMPT, **kwargs: Any + ) -> QAEvalChain: + """Load QA Eval Chain from LLM. + + Args: + llm (BaseLLM): the base language model to use. + + prompt (PromptTemplate): A prompt template containing the input_variables: + 'input', 'answer' and 'result' that will be used as the prompt + for evaluation. + Defaults to PROMPT. + + **kwargs: additional keyword arguments. + + Returns: + QAEvalChain: the loaded QA eval chain. + """ + expected_input_vars = {"query", "answer", "result"} + if expected_input_vars != set(prompt.input_variables): + raise ValueError( + f"Input variables should be {expected_input_vars}, " + f"but got {prompt.input_variables}" + ) + return cls(llm=llm, prompt=prompt, **kwargs) + + def evaluate( + self, + examples: List[dict], + predictions: List[dict], + question_key: str = "query", + answer_key: str = "answer", + prediction_key: str = "result", + ) -> List[dict]: + """Evaluate question answering examples and predictions.""" + inputs = [ + { + "query": example[question_key], + "answer": example[answer_key], + "result": predictions[i][prediction_key], + } + for i, example in enumerate(examples) + ] + + return self.apply(inputs) diff --git a/langchain/evaluation/qa/eval_prompt.py b/langchain/evaluation/qa/eval_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..4e6c7c6a73978ad7b94426c70948effee5c1d1e5 --- /dev/null +++ b/langchain/evaluation/qa/eval_prompt.py @@ -0,0 +1,21 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +template = """You are a teacher grading a quiz. +You are given a question, the student's answer, and the true answer, and are asked to score it as either CORRECT or INCORRECT. + +Example Format: +QUESTION: question here +STUDENT ANSWER: student's answer here +TRUE ANSWER: true answer here +GRADE: CORRECT or INCORRECT here + +Please remember to grade them based on being factually accurate. Begin! + +QUESTION: {query} +STUDENT ANSWER: {result} +TRUE ANSWER: {answer} +GRADE:""" +PROMPT = PromptTemplate( + input_variables=["query", "result", "answer"], template=template +) diff --git a/langchain/evaluation/qa/generate_chain.py b/langchain/evaluation/qa/generate_chain.py new file mode 100644 index 0000000000000000000000000000000000000000..6294146259f4461065880ed8a72eaeba1fe60ce2 --- /dev/null +++ b/langchain/evaluation/qa/generate_chain.py @@ -0,0 +1,17 @@ +"""LLM Chain specifically for generating examples for question answering.""" +from __future__ import annotations + +from typing import Any + +from langchain.chains.llm import LLMChain +from langchain.evaluation.qa.generate_prompt import PROMPT +from langchain.llms.base import BaseLLM + + +class QAGenerateChain(LLMChain): + """LLM Chain specifically for generating examples for question answering.""" + + @classmethod + def from_llm(cls, llm: BaseLLM, **kwargs: Any) -> QAGenerateChain: + """Load QA Generate Chain from LLM.""" + return cls(llm=llm, prompt=PROMPT, **kwargs) diff --git a/langchain/evaluation/qa/generate_prompt.py b/langchain/evaluation/qa/generate_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..2fe278cfea71a394f2f3d0ffa0c2cdfa8263d0b5 --- /dev/null +++ b/langchain/evaluation/qa/generate_prompt.py @@ -0,0 +1,25 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate +from langchain.output_parsers.regex import RegexParser + +template = """You are a teacher coming up with questions to ask on a quiz. +Given the following document, please generate a question and answer based on that document. + +Example Format: + +... + +QUESTION: question here +ANSWER: answer here + +These questions should be detailed and be based explicitly on information in the document. Begin! + + +{doc} +""" +output_parser = RegexParser( + regex=r"QUESTION: (.*?)\nANSWER: (.*)", output_keys=["query", "answer"] +) +PROMPT = PromptTemplate( + input_variables=["doc"], template=template, output_parser=output_parser +) diff --git a/langchain/example_generator.py b/langchain/example_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..7c309d050a2849fc50b73c825ff9a9a77087c1a1 --- /dev/null +++ b/langchain/example_generator.py @@ -0,0 +1,23 @@ +"""Utility functions for working with prompts.""" +from typing import List + +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate + +TEST_GEN_TEMPLATE_SUFFIX = "Add another example." + + +def generate_example( + examples: List[dict], llm: BaseLLM, prompt_template: PromptTemplate +) -> str: + """Return another example given a list of examples for a prompt.""" + prompt = FewShotPromptTemplate( + examples=examples, + suffix=TEST_GEN_TEMPLATE_SUFFIX, + input_variables=[], + example_prompt=prompt_template, + ) + chain = LLMChain(llm=llm, prompt=prompt) + return chain.predict() diff --git a/langchain/formatting.py b/langchain/formatting.py new file mode 100644 index 0000000000000000000000000000000000000000..61c7c11641b6127d92ab8e0e52f3d63f19879438 --- /dev/null +++ b/langchain/formatting.py @@ -0,0 +1,32 @@ +"""Utilities for formatting strings.""" +from string import Formatter +from typing import Any, Mapping, Sequence, Union + + +class StrictFormatter(Formatter): + """A subclass of formatter that checks for extra keys.""" + + def check_unused_args( + self, + used_args: Sequence[Union[int, str]], + args: Sequence, + kwargs: Mapping[str, Any], + ) -> None: + """Check to see if extra parameters are passed.""" + extra = set(kwargs).difference(used_args) + if extra: + raise KeyError(extra) + + def vformat( + self, format_string: str, args: Sequence, kwargs: Mapping[str, Any] + ) -> str: + """Check that no arguments are provided.""" + if len(args) > 0: + raise ValueError( + "No arguments should be provided, " + "everything should be passed as keyword arguments." + ) + return super().vformat(format_string, args, kwargs) + + +formatter = StrictFormatter() diff --git a/langchain/graphs/__init__.py b/langchain/graphs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..68851c6ddbdf0ced7808fe187a0ee956512acc63 --- /dev/null +++ b/langchain/graphs/__init__.py @@ -0,0 +1,4 @@ +"""Graph implementations.""" +from langchain.graphs.networkx_graph import NetworkxEntityGraph + +__all__ = ["NetworkxEntityGraph"] diff --git a/langchain/graphs/__pycache__/__init__.cpython-39.pyc b/langchain/graphs/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..930402de7ed281469d6b6cdb4f93e03d49304c89 Binary files /dev/null and b/langchain/graphs/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/graphs/__pycache__/networkx_graph.cpython-39.pyc b/langchain/graphs/__pycache__/networkx_graph.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2fe369c35bd152ffd99906436edbe6a66859dc7 Binary files /dev/null and b/langchain/graphs/__pycache__/networkx_graph.cpython-39.pyc differ diff --git a/langchain/graphs/networkx_graph.py b/langchain/graphs/networkx_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..c49a1ad9a616760b989cb8d6b61ca746019866ff --- /dev/null +++ b/langchain/graphs/networkx_graph.py @@ -0,0 +1,124 @@ +"""Networkx wrapper for graph operations.""" +from __future__ import annotations + +from typing import Any, List, NamedTuple, Optional, Tuple + +KG_TRIPLE_DELIMITER = "<|>" + + +class KnowledgeTriple(NamedTuple): + """A triple in the graph.""" + + subject: str + predicate: str + object_: str + + @classmethod + def from_string(cls, triple_string: str) -> "KnowledgeTriple": + """Create a KnowledgeTriple from a string.""" + subject, predicate, object_ = triple_string.strip().split(", ") + subject = subject[1:] + object_ = object_[:-1] + return cls(subject, predicate, object_) + + +def parse_triples(knowledge_str: str) -> List[KnowledgeTriple]: + """Parse knowledge triples from the knowledge string.""" + knowledge_str = knowledge_str.strip() + if not knowledge_str or knowledge_str == "NONE": + return [] + triple_strs = knowledge_str.split(KG_TRIPLE_DELIMITER) + results = [] + for triple_str in triple_strs: + try: + kg_triple = KnowledgeTriple.from_string(triple_str) + except ValueError: + continue + results.append(kg_triple) + return results + + +def get_entities(entity_str: str) -> List[str]: + """Extract entities from entity string.""" + if entity_str.strip() == "NONE": + return [] + else: + return [w.strip() for w in entity_str.split(",")] + + +class NetworkxEntityGraph: + """Networkx wrapper for entity graph operations.""" + + def __init__(self, graph: Optional[Any] = None) -> None: + """Create a new graph.""" + try: + import networkx as nx + except ImportError: + raise ValueError( + "Could not import networkx python package. " + "Please it install it with `pip install networkx`." + ) + if graph is not None: + if not isinstance(graph, nx.DiGraph): + raise ValueError("Passed in graph is not of correct shape") + self._graph = graph + else: + self._graph = nx.DiGraph() + + @classmethod + def from_gml(cls, gml_path: str) -> NetworkxEntityGraph: + try: + import networkx as nx + except ImportError: + raise ValueError( + "Could not import networkx python package. " + "Please it install it with `pip install networkx`." + ) + graph = nx.read_gml(gml_path) + return cls(graph) + + def add_triple(self, knowledge_triple: KnowledgeTriple) -> None: + """Add a triple to the graph.""" + # Creates nodes if they don't exist + # Overwrites existing edges + if not self._graph.has_node(knowledge_triple.subject): + self._graph.add_node(knowledge_triple.subject) + if not self._graph.has_node(knowledge_triple.object_): + self._graph.add_node(knowledge_triple.object_) + self._graph.add_edge( + knowledge_triple.subject, + knowledge_triple.object_, + relation=knowledge_triple.predicate, + ) + + def delete_triple(self, knowledge_triple: KnowledgeTriple) -> None: + """Delete a triple from the graph.""" + if self._graph.has_edge(knowledge_triple.subject, knowledge_triple.object_): + self._graph.remove_edge(knowledge_triple.subject, knowledge_triple.object_) + + def get_triples(self) -> List[Tuple[str, str, str]]: + """Get all triples in the graph.""" + return [(u, v, d["relation"]) for u, v, d in self._graph.edges(data=True)] + + def get_entity_knowledge(self, entity: str, depth: int = 1) -> List[str]: + """Get information about an entity.""" + import networkx as nx + + # TODO: Have more information-specific retrieval methods + if not self._graph.has_node(entity): + return [] + + results = [] + for src, sink in nx.dfs_edges(self._graph, entity, depth_limit=depth): + relation = self._graph[src][sink]["relation"] + results.append(f"{src} {relation} {sink}") + return results + + def write_to_gml(self, path: str) -> None: + import networkx as nx + + nx.write_gml(self._graph, path) + + def clear(self) -> None: + """Clear the graph.""" + self._graph.clear() diff --git a/langchain/indexes/__init__.py b/langchain/indexes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7e81e4ac88c34797809caf8a4bb2dbc192b8baed --- /dev/null +++ b/langchain/indexes/__init__.py @@ -0,0 +1,5 @@ +"""All index utils.""" +from langchain.indexes.graph import GraphIndexCreator +from langchain.indexes.vectorstore import VectorstoreIndexCreator + +__all__ = ["GraphIndexCreator", "VectorstoreIndexCreator"] diff --git a/langchain/indexes/graph.py b/langchain/indexes/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..519e4ac4aae9172f3e5b7284c6dedd9509e2a99d --- /dev/null +++ b/langchain/indexes/graph.py @@ -0,0 +1,30 @@ +"""Graph Index Creator.""" +from typing import Optional, Type + +from pydantic import BaseModel + +from langchain.chains.llm import LLMChain +from langchain.graphs.networkx_graph import NetworkxEntityGraph, parse_triples +from langchain.indexes.prompts.knowledge_triplet_extraction import ( + KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT, +) +from langchain.llms.base import BaseLLM + + +class GraphIndexCreator(BaseModel): + """Functionality to create graph index.""" + + llm: Optional[BaseLLM] = None + graph_type: Type[NetworkxEntityGraph] = NetworkxEntityGraph + + def from_text(self, text: str) -> NetworkxEntityGraph: + """Create graph index from text.""" + if self.llm is None: + raise ValueError("llm should not be None") + graph = self.graph_type() + chain = LLMChain(llm=self.llm, prompt=KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT) + output = chain.predict(text=text) + knowledge = parse_triples(output) + for triple in knowledge: + graph.add_triple(triple) + return graph diff --git a/langchain/indexes/prompts/__init__.py b/langchain/indexes/prompts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a5833cd2a5f18bb8e3bd4d90784a758b2ba1953 --- /dev/null +++ b/langchain/indexes/prompts/__init__.py @@ -0,0 +1 @@ +"""Relevant prompts for constructing indexes.""" diff --git a/langchain/indexes/prompts/entity_extraction.py b/langchain/indexes/prompts/entity_extraction.py new file mode 100644 index 0000000000000000000000000000000000000000..47cc349cb2b3ab5542e2d2b271ebdb66a255749e --- /dev/null +++ b/langchain/indexes/prompts/entity_extraction.py @@ -0,0 +1,40 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_ENTITY_EXTRACTION_TEMPLATE = """You are an AI assistant reading the transcript of a conversation between an AI and a human. Extract all of the proper nouns from the last line of conversation. As a guideline, a proper noun is generally capitalized. You should definitely extract all names and places. + +The conversation history is provided just in case of a coreference (e.g. "What do you know about him" where "him" is defined in a previous line) -- ignore items mentioned there that are not in the last line. + +Return the output as a single comma-separated list, or NONE if there is nothing of note to return (e.g. the user is just issuing a greeting or having a simple conversation). + +EXAMPLE +Conversation history: +Person #1: how's it going today? +AI: "It's going great! How about you?" +Person #1: good! busy working on Langchain. lots to do. +AI: "That sounds like a lot of work! What kind of things are you doing to make Langchain better?" +Last line: +Person #1: i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. +Output: Langchain +END OF EXAMPLE + +EXAMPLE +Conversation history: +Person #1: how's it going today? +AI: "It's going great! How about you?" +Person #1: good! busy working on Langchain. lots to do. +AI: "That sounds like a lot of work! What kind of things are you doing to make Langchain better?" +Last line: +Person #1: i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. I'm working with Person #2. +Output: Langchain, Person #2 +END OF EXAMPLE + +Conversation history (for reference only): +{history} +Last line of conversation (for extraction): +Human: {input} + +Output:""" +ENTITY_EXTRACTION_PROMPT = PromptTemplate( + input_variables=["history", "input"], template=_DEFAULT_ENTITY_EXTRACTION_TEMPLATE +) diff --git a/langchain/indexes/prompts/entity_summarization.py b/langchain/indexes/prompts/entity_summarization.py new file mode 100644 index 0000000000000000000000000000000000000000..41e97f5f62d9704327da2ffbdd883c629c5ba523 --- /dev/null +++ b/langchain/indexes/prompts/entity_summarization.py @@ -0,0 +1,25 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_ENTITY_SUMMARIZATION_TEMPLATE = """You are an AI assistant helping a human keep track of facts about relevant people, places, and concepts in their life. Update the summary of the provided entity in the "Entity" section based on the last line of your conversation with the human. If you are writing the summary for the first time, return a single sentence. +The update should only include facts that are relayed in the last line of conversation about the provided entity, and should only contain facts about the provided entity. + +If there is no new information about the provided entity or the information is not worth noting (not an important or relevant fact to remember long-term), return the existing summary unchanged. + +Full conversation history (for context): +{history} + +Entity to summarize: +{entity} + +Existing summary of {entity}: +{summary} + +Last line of conversation: +Human: {input} +Updated summary:""" + +ENTITY_SUMMARIZATION_PROMPT = PromptTemplate( + input_variables=["entity", "summary", "history", "input"], + template=_DEFAULT_ENTITY_SUMMARIZATION_TEMPLATE, +) diff --git a/langchain/indexes/prompts/knowledge_triplet_extraction.py b/langchain/indexes/prompts/knowledge_triplet_extraction.py new file mode 100644 index 0000000000000000000000000000000000000000..0505965c098c4cf814d2f8289e94f8730d600a55 --- /dev/null +++ b/langchain/indexes/prompts/knowledge_triplet_extraction.py @@ -0,0 +1,37 @@ +# flake8: noqa + +from langchain.graphs.networkx_graph import KG_TRIPLE_DELIMITER +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE = ( + "You are a networked intelligence helping a human track knowledge triples" + " about all relevant people, things, concepts, etc. and integrating" + " them with your knowledge stored within your weights" + " as well as that stored in a knowledge graph." + " Extract all of the knowledge triples from the text." + " A knowledge triple is a clause that contains a subject, a predicate," + " and an object. The subject is the entity being described," + " the predicate is the property of the subject that is being" + " described, and the object is the value of the property.\n\n" + "EXAMPLE\n" + "It's a state in the US. It's also the number 1 producer of gold in the US.\n\n" + f"Output: (Nevada, is a, state){KG_TRIPLE_DELIMITER}(Nevada, is in, US)" + f"{KG_TRIPLE_DELIMITER}(Nevada, is the number 1 producer of, gold)\n" + "END OF EXAMPLE\n\n" + "EXAMPLE\n" + "I'm going to the store.\n\n" + "Output: NONE\n" + "END OF EXAMPLE\n\n" + "EXAMPLE\n" + "Oh huh. I know Descartes likes to drive antique scooters and play the mandolin.\n" + f"Output: (Descartes, likes to drive, antique scooters){KG_TRIPLE_DELIMITER}(Descartes, plays, mandolin)\n" + "END OF EXAMPLE\n\n" + "EXAMPLE\n" + "{text}" + "Output:" +) + +KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT = PromptTemplate( + input_variables=["text"], + template=_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE, +) diff --git a/langchain/indexes/vectorstore.py b/langchain/indexes/vectorstore.py new file mode 100644 index 0000000000000000000000000000000000000000..194942fa3238e38bb3706080329b1535fbd4bc11 --- /dev/null +++ b/langchain/indexes/vectorstore.py @@ -0,0 +1,74 @@ +from typing import Any, List, Optional, Type + +from pydantic import BaseModel, Extra, Field + +from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain +from langchain.chains.retrieval_qa.base import RetrievalQA +from langchain.document_loaders.base import BaseLoader +from langchain.embeddings.base import Embeddings +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI +from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.chroma import Chroma + + +def _get_default_text_splitter() -> TextSplitter: + return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + + +class VectorStoreIndexWrapper(BaseModel): + """Wrapper around a vectorstore for easy access.""" + + vectorstore: VectorStore + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def query(self, question: str, llm: Optional[BaseLLM] = None, **kwargs: Any) -> str: + """Query the vectorstore.""" + llm = llm or OpenAI(temperature=0) + chain = RetrievalQA.from_chain_type( + llm, retriever=self.vectorstore.as_retriever(), **kwargs + ) + return chain.run(question) + + def query_with_sources( + self, question: str, llm: Optional[BaseLLM] = None, **kwargs: Any + ) -> dict: + """Query the vectorstore and get back sources.""" + llm = llm or OpenAI(temperature=0) + chain = RetrievalQAWithSourcesChain.from_chain_type( + llm, retriever=self.vectorstore.as_retriever(), **kwargs + ) + return chain({chain.question_key: question}) + + +class VectorstoreIndexCreator(BaseModel): + """Logic for creating indexes.""" + + vectorstore_cls: Type[VectorStore] = Chroma + embedding: Embeddings = Field(default_factory=OpenAIEmbeddings) + text_splitter: TextSplitter = Field(default_factory=_get_default_text_splitter) + vectorstore_kwargs: dict = Field(default_factory=dict) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper: + """Create a vectorstore index from loaders.""" + docs = [] + for loader in loaders: + docs.extend(loader.load()) + sub_docs = self.text_splitter.split_documents(docs) + vectorstore = self.vectorstore_cls.from_documents( + sub_docs, self.embedding, **self.vectorstore_kwargs + ) + return VectorStoreIndexWrapper(vectorstore=vectorstore) diff --git a/langchain/input.py b/langchain/input.py new file mode 100644 index 0000000000000000000000000000000000000000..7f054bb9cd486d9ce83b0927cd6019267256416e --- /dev/null +++ b/langchain/input.py @@ -0,0 +1,36 @@ +"""Handle chained inputs.""" +from typing import Dict, List, Optional + +_TEXT_COLOR_MAPPING = { + "blue": "36;1", + "yellow": "33;1", + "pink": "38;5;200", + "green": "32;1", + "red": "31;1", +} + + +def get_color_mapping( + items: List[str], excluded_colors: Optional[List] = None +) -> Dict[str, str]: + """Get mapping for items to a support color.""" + colors = list(_TEXT_COLOR_MAPPING.keys()) + if excluded_colors is not None: + colors = [c for c in colors if c not in excluded_colors] + color_mapping = {item: colors[i % len(colors)] for i, item in enumerate(items)} + return color_mapping + + +def get_colored_text(text: str, color: str) -> str: + """Get colored text.""" + color_str = _TEXT_COLOR_MAPPING[color] + return f"\u001b[{color_str}m\033[1;3m{text}\u001b[0m" + + +def print_text(text: str, color: Optional[str] = None, end: str = "") -> None: + """Print text with highlighting and no end characters.""" + if color is None: + text_to_print = text + else: + text_to_print = get_colored_text(text, color) + print(text_to_print, end=end) diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06bc999fe6ac010e092ed496865acb413335e3bf --- /dev/null +++ b/langchain/llms/__init__.py @@ -0,0 +1,79 @@ +"""Wrappers on top of large language models APIs.""" +from typing import Dict, Type + +from langchain.llms.ai21 import AI21 +from langchain.llms.aleph_alpha import AlephAlpha +from langchain.llms.anthropic import Anthropic +from langchain.llms.bananadev import Banana +from langchain.llms.base import BaseLLM +from langchain.llms.cerebriumai import CerebriumAI +from langchain.llms.cohere import Cohere +from langchain.llms.deepinfra import DeepInfra +from langchain.llms.forefrontai import ForefrontAI +from langchain.llms.gooseai import GooseAI +from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint +from langchain.llms.huggingface_hub import HuggingFaceHub +from langchain.llms.huggingface_pipeline import HuggingFacePipeline +from langchain.llms.modal import Modal +from langchain.llms.nlpcloud import NLPCloud +from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat +from langchain.llms.petals import Petals +from langchain.llms.promptlayer_openai import PromptLayerOpenAI, PromptLayerOpenAIChat +from langchain.llms.sagemaker_endpoint import SagemakerEndpoint +from langchain.llms.self_hosted import SelfHostedPipeline +from langchain.llms.self_hosted_hugging_face import SelfHostedHuggingFaceLLM +from langchain.llms.stochasticai import StochasticAI +from langchain.llms.writer import Writer + +__all__ = [ + "Anthropic", + "AlephAlpha", + "Banana", + "CerebriumAI", + "Cohere", + "DeepInfra", + "ForefrontAI", + "GooseAI", + "Modal", + "NLPCloud", + "OpenAI", + "OpenAIChat", + "Petals", + "HuggingFaceEndpoint", + "HuggingFaceHub", + "SagemakerEndpoint", + "HuggingFacePipeline", + "AI21", + "AzureOpenAI", + "SelfHostedPipeline", + "SelfHostedHuggingFaceLLM", + "PromptLayerOpenAI", + "PromptLayerOpenAIChat", + "StochasticAI", + "Writer", +] + +type_to_cls_dict: Dict[str, Type[BaseLLM]] = { + "ai21": AI21, + "aleph_alpha": AlephAlpha, + "anthropic": Anthropic, + "bananadev": Banana, + "cerebriumai": CerebriumAI, + "cohere": Cohere, + "deepinfra": DeepInfra, + "forefrontai": ForefrontAI, + "gooseai": GooseAI, + "huggingface_hub": HuggingFaceHub, + "huggingface_endpoint": HuggingFaceEndpoint, + "modal": Modal, + "sagemaker_endpoint": SagemakerEndpoint, + "nlpcloud": NLPCloud, + "openai": OpenAI, + "petals": Petals, + "huggingface_pipeline": HuggingFacePipeline, + "azure": AzureOpenAI, + "self_hosted": SelfHostedPipeline, + "self_hosted_hugging_face": SelfHostedHuggingFaceLLM, + "stochasticai": StochasticAI, + "writer": Writer, +} diff --git a/langchain/llms/__pycache__/__init__.cpython-39.pyc b/langchain/llms/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a668b5714f699a03b45bc36e6a0c55af4c732060 Binary files /dev/null and b/langchain/llms/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/ai21.cpython-39.pyc b/langchain/llms/__pycache__/ai21.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c71e55a76ef3fa9a9e70fa7032bf396bc46ff48 Binary files /dev/null and b/langchain/llms/__pycache__/ai21.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/aleph_alpha.cpython-39.pyc b/langchain/llms/__pycache__/aleph_alpha.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0deb73baf96c481aff31b4002e5ae0a33a4e889d Binary files /dev/null and b/langchain/llms/__pycache__/aleph_alpha.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/anthropic.cpython-39.pyc b/langchain/llms/__pycache__/anthropic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e8bdd8efa92d8cb729e0f1fbf1bb53b58dabe14 Binary files /dev/null and b/langchain/llms/__pycache__/anthropic.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/bananadev.cpython-39.pyc b/langchain/llms/__pycache__/bananadev.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d85fb22b870d9e3218c3583b7f8a8b0c366b5efa Binary files /dev/null and b/langchain/llms/__pycache__/bananadev.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/base.cpython-39.pyc b/langchain/llms/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4ffd07a40674c073ca261531753e75baa9ca76a Binary files /dev/null and b/langchain/llms/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/cerebriumai.cpython-39.pyc b/langchain/llms/__pycache__/cerebriumai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab3cf2fe14580f2d09d204c1dfeeab7454417f60 Binary files /dev/null and b/langchain/llms/__pycache__/cerebriumai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/cohere.cpython-39.pyc b/langchain/llms/__pycache__/cohere.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e38ca4d0d6b0cf227239a97f6e956d7a05b9f05 Binary files /dev/null and b/langchain/llms/__pycache__/cohere.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/deepinfra.cpython-39.pyc b/langchain/llms/__pycache__/deepinfra.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aad4d57990c14b9be547fc38f0ab38e12b3fddd1 Binary files /dev/null and b/langchain/llms/__pycache__/deepinfra.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/forefrontai.cpython-39.pyc b/langchain/llms/__pycache__/forefrontai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6637cc4c37d790c7c9b8ae735b3a34e0ff146262 Binary files /dev/null and b/langchain/llms/__pycache__/forefrontai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/gooseai.cpython-39.pyc b/langchain/llms/__pycache__/gooseai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2efebbcb14b7882660ea9a44f22d30656379ef9 Binary files /dev/null and b/langchain/llms/__pycache__/gooseai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/huggingface_endpoint.cpython-39.pyc b/langchain/llms/__pycache__/huggingface_endpoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e28905a8b810c33a229b00fe35c8d5ede324fe5f Binary files /dev/null and b/langchain/llms/__pycache__/huggingface_endpoint.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/huggingface_hub.cpython-39.pyc b/langchain/llms/__pycache__/huggingface_hub.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58c3d77dead810e3923bbd7a4b4e8ea5e9240013 Binary files /dev/null and b/langchain/llms/__pycache__/huggingface_hub.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/huggingface_pipeline.cpython-39.pyc b/langchain/llms/__pycache__/huggingface_pipeline.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75bb089b613cb636a1715b5ff13d2f1960392b18 Binary files /dev/null and b/langchain/llms/__pycache__/huggingface_pipeline.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/loading.cpython-39.pyc b/langchain/llms/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83c5ceb031fb731277ed2e2d4f9a1cf72864b119 Binary files /dev/null and b/langchain/llms/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/modal.cpython-39.pyc b/langchain/llms/__pycache__/modal.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7132381335fd2c3c591e723c569a3b03cc8a9c19 Binary files /dev/null and b/langchain/llms/__pycache__/modal.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/nlpcloud.cpython-39.pyc b/langchain/llms/__pycache__/nlpcloud.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8441e386070c299583297fb4bb309da857381f9b Binary files /dev/null and b/langchain/llms/__pycache__/nlpcloud.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/openai.cpython-39.pyc b/langchain/llms/__pycache__/openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1eb4d02728133b9d48fb54c21c0cbe5a998d74b8 Binary files /dev/null and b/langchain/llms/__pycache__/openai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/petals.cpython-39.pyc b/langchain/llms/__pycache__/petals.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fea3840afe1c258721bddabc9057e295e06824b Binary files /dev/null and b/langchain/llms/__pycache__/petals.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/promptlayer_openai.cpython-39.pyc b/langchain/llms/__pycache__/promptlayer_openai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..644aa850a4fa03a65d1dbb429b7a1f69d9893f02 Binary files /dev/null and b/langchain/llms/__pycache__/promptlayer_openai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/sagemaker_endpoint.cpython-39.pyc b/langchain/llms/__pycache__/sagemaker_endpoint.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5f1a97e859cfeed50414aac8187a67eeac0ed1e Binary files /dev/null and b/langchain/llms/__pycache__/sagemaker_endpoint.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/self_hosted.cpython-39.pyc b/langchain/llms/__pycache__/self_hosted.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02b1946b22edc6dfe69ae55d2b91e4932d091b63 Binary files /dev/null and b/langchain/llms/__pycache__/self_hosted.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/self_hosted_hugging_face.cpython-39.pyc b/langchain/llms/__pycache__/self_hosted_hugging_face.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ec6fa5b0207c6669e15f64fa88fa29b654d84702 Binary files /dev/null and b/langchain/llms/__pycache__/self_hosted_hugging_face.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/stochasticai.cpython-39.pyc b/langchain/llms/__pycache__/stochasticai.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1dbd7a1626fb28b958ab2ff9ac12b18b14fd3ade Binary files /dev/null and b/langchain/llms/__pycache__/stochasticai.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/utils.cpython-39.pyc b/langchain/llms/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ad304c05dc1bb8046ac8ba2ba95b57a5a225cb4 Binary files /dev/null and b/langchain/llms/__pycache__/utils.cpython-39.pyc differ diff --git a/langchain/llms/__pycache__/writer.cpython-39.pyc b/langchain/llms/__pycache__/writer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772f2e235918b3939adf51116e158d8fbae4a81f Binary files /dev/null and b/langchain/llms/__pycache__/writer.cpython-39.pyc differ diff --git a/langchain/llms/ai21.py b/langchain/llms/ai21.py new file mode 100644 index 0000000000000000000000000000000000000000..169718e26d1104a4a1e7c68c1f07d78ca03cc7ab --- /dev/null +++ b/langchain/llms/ai21.py @@ -0,0 +1,149 @@ +"""Wrapper around AI21 APIs.""" +from typing import Any, Dict, List, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + + +class AI21PenaltyData(BaseModel): + """Parameters for AI21 penalty data.""" + + scale: int = 0 + applyToWhitespaces: bool = True + applyToPunctuations: bool = True + applyToNumbers: bool = True + applyToStopwords: bool = True + applyToEmojis: bool = True + + +class AI21(LLM, BaseModel): + """Wrapper around AI21 large language models. + + To use, you should have the environment variable ``AI21_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain.llms import AI21 + ai21 = AI21(model="j1-jumbo") + """ + + model: str = "j1-jumbo" + """Model name to use.""" + + temperature: float = 0.7 + """What sampling temperature to use.""" + + maxTokens: int = 256 + """The maximum number of tokens to generate in the completion.""" + + minTokens: int = 0 + """The minimum number of tokens to generate in the completion.""" + + topP: float = 1.0 + """Total probability mass of tokens to consider at each step.""" + + presencePenalty: AI21PenaltyData = AI21PenaltyData() + """Penalizes repeated tokens.""" + + countPenalty: AI21PenaltyData = AI21PenaltyData() + """Penalizes repeated tokens according to count.""" + + frequencyPenalty: AI21PenaltyData = AI21PenaltyData() + """Penalizes repeated tokens according to frequency.""" + + numResults: int = 1 + """How many completions to generate for each prompt.""" + + logitBias: Optional[Dict[str, float]] = None + """Adjust the probability of specific tokens being generated.""" + + ai21_api_key: Optional[str] = None + + stop: Optional[List[str]] = None + + base_url: Optional[str] = None + """Base url to use, if None decides based on model name.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + ai21_api_key = get_from_dict_or_env(values, "ai21_api_key", "AI21_API_KEY") + values["ai21_api_key"] = ai21_api_key + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling AI21 API.""" + return { + "temperature": self.temperature, + "maxTokens": self.maxTokens, + "minTokens": self.minTokens, + "topP": self.topP, + "presencePenalty": self.presencePenalty.dict(), + "countPenalty": self.countPenalty.dict(), + "frequencyPenalty": self.frequencyPenalty.dict(), + "numResults": self.numResults, + "logitBias": self.logitBias, + } + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "ai21" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to AI21's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = ai21("Tell me a joke.") + """ + if self.stop is not None and stop is not None: + raise ValueError("`stop` found in both the input and default params.") + elif self.stop is not None: + stop = self.stop + elif stop is None: + stop = [] + if self.base_url is not None: + base_url = self.base_url + else: + if self.model in ("j1-grande-instruct",): + base_url = "https://api.ai21.com/studio/v1/experimental" + else: + base_url = "https://api.ai21.com/studio/v1" + response = requests.post( + url=f"{base_url}/{self.model}/complete", + headers={"Authorization": f"Bearer {self.ai21_api_key}"}, + json={"prompt": prompt, "stopSequences": stop, **self._default_params}, + ) + if response.status_code != 200: + optional_detail = response.json().get("error") + raise ValueError( + f"AI21 /complete call failed with status code {response.status_code}." + f" Details: {optional_detail}" + ) + response_json = response.json() + return response_json["completions"][0]["data"]["text"] diff --git a/langchain/llms/aleph_alpha.py b/langchain/llms/aleph_alpha.py new file mode 100644 index 0000000000000000000000000000000000000000..810a8c5891d605b16902388c0e8f2e93ad034c6f --- /dev/null +++ b/langchain/llms/aleph_alpha.py @@ -0,0 +1,236 @@ +"""Wrapper around Aleph Alpha APIs.""" +from typing import Any, Dict, List, Optional, Sequence + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + + +class AlephAlpha(LLM, BaseModel): + """Wrapper around Aleph Alpha large language models. + + To use, you should have the ``aleph_alpha_client`` python package installed, and the + environment variable ``ALEPH_ALPHA_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Parameters are explained more in depth here: + https://github.com/Aleph-Alpha/aleph-alpha-client/blob/c14b7dd2b4325c7da0d6a119f6e76385800e097b/aleph_alpha_client/completion.py#L10 + + Example: + .. code-block:: python + + from langchain.llms import AlephAlpha + alpeh_alpha = AlephAlpha(aleph_alpha_api_key="my-api-key") + """ + + client: Any #: :meta private: + model: Optional[str] = "luminous-base" + """Model name to use.""" + + maximum_tokens: int = 64 + """The maximum number of tokens to be generated.""" + + temperature: float = 0.0 + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: int = 0 + """Number of most likely tokens to consider at each step.""" + + top_p: float = 0.0 + """Total probability mass of tokens to consider at each step.""" + + presence_penalty: float = 0.0 + """Penalizes repeated tokens.""" + + frequency_penalty: float = 0.0 + """Penalizes repeated tokens according to frequency.""" + + repetition_penalties_include_prompt: Optional[bool] = False + """Flag deciding whether presence penalty or frequency penalty are + updated from the prompt.""" + + use_multiplicative_presence_penalty: Optional[bool] = False + """Flag deciding whether presence penalty is applied + multiplicatively (True) or additively (False).""" + + penalty_bias: Optional[str] = None + """Penalty bias for the completion.""" + + penalty_exceptions: Optional[List[str]] = None + """List of strings that may be generated without penalty, + regardless of other penalty settings""" + + penalty_exceptions_include_stop_sequences: Optional[bool] = None + """Should stop_sequences be included in penalty_exceptions.""" + + best_of: Optional[int] = None + """returns the one with the "best of" results + (highest log probability per token) + """ + + n: int = 1 + """How many completions to generate for each prompt.""" + + logit_bias: Optional[Dict[int, float]] = None + """The logit bias allows to influence the likelihood of generating tokens.""" + + log_probs: Optional[int] = None + """Number of top log probabilities to be returned for each generated token.""" + + tokens: Optional[bool] = False + """return tokens of completion.""" + + disable_optimizations: Optional[bool] = False + + minimum_tokens: Optional[int] = 0 + """Generate at least this number of tokens.""" + + echo: bool = False + """Echo the prompt in the completion.""" + + use_multiplicative_frequency_penalty: bool = False + + sequence_penalty: float = 0.0 + + sequence_penalty_min_length: int = 2 + + use_multiplicative_sequence_penalty: bool = False + + completion_bias_inclusion: Optional[Sequence[str]] = None + + completion_bias_inclusion_first_token_only: bool = False + + completion_bias_exclusion: Optional[Sequence[str]] = None + + completion_bias_exclusion_first_token_only: bool = False + """Only consider the first token for the completion_bias_exclusion.""" + + contextual_control_threshold: Optional[float] = None + """If set to None, attention control parameters only apply to those tokens that have + explicitly been set in the request. + If set to a non-None value, control parameters are also applied to similar tokens. + """ + + control_log_additive: Optional[bool] = True + """True: apply control by adding the log(control_factor) to attention scores. + False: (attention_scores - - attention_scores.min(-1)) * control_factor + """ + + repetition_penalties_include_completion: bool = True + """Flag deciding whether presence penalty or frequency penalty + are updated from the completion.""" + + raw_completion: bool = False + """Force the raw completion of the model to be returned.""" + + aleph_alpha_api_key: Optional[str] = None + """API key for Aleph Alpha API.""" + + stop_sequences: Optional[List[str]] = None + """Stop sequences to use.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + aleph_alpha_api_key = get_from_dict_or_env( + values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY" + ) + try: + import aleph_alpha_client + + values["client"] = aleph_alpha_client.Client(token=aleph_alpha_api_key) + except ImportError: + raise ValueError( + "Could not import aleph_alpha_client python package. " + "Please it install it with `pip install aleph_alpha_client`." + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling the Aleph Alpha API.""" + return { + "maximum_tokens": self.maximum_tokens, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "n": self.n, + "repetition_penalties_include_prompt": self.repetition_penalties_include_prompt, # noqa: E501 + "use_multiplicative_presence_penalty": self.use_multiplicative_presence_penalty, # noqa: E501 + "penalty_bias": self.penalty_bias, + "penalty_exceptions": self.penalty_exceptions, + "penalty_exceptions_include_stop_sequences": self.penalty_exceptions_include_stop_sequences, # noqa: E501 + "best_of": self.best_of, + "logit_bias": self.logit_bias, + "log_probs": self.log_probs, + "tokens": self.tokens, + "disable_optimizations": self.disable_optimizations, + "minimum_tokens": self.minimum_tokens, + "echo": self.echo, + "use_multiplicative_frequency_penalty": self.use_multiplicative_frequency_penalty, # noqa: E501 + "sequence_penalty": self.sequence_penalty, + "sequence_penalty_min_length": self.sequence_penalty_min_length, + "use_multiplicative_sequence_penalty": self.use_multiplicative_sequence_penalty, # noqa: E501 + "completion_bias_inclusion": self.completion_bias_inclusion, + "completion_bias_inclusion_first_token_only": self.completion_bias_inclusion_first_token_only, # noqa: E501 + "completion_bias_exclusion": self.completion_bias_exclusion, + "completion_bias_exclusion_first_token_only": self.completion_bias_exclusion_first_token_only, # noqa: E501 + "contextual_control_threshold": self.contextual_control_threshold, + "control_log_additive": self.control_log_additive, + "repetition_penalties_include_completion": self.repetition_penalties_include_completion, # noqa: E501 + "raw_completion": self.raw_completion, + } + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "alpeh_alpha" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Aleph Alpha's completion endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = alpeh_alpha("Tell me a joke.") + """ + from aleph_alpha_client import CompletionRequest, Prompt + + params = self._default_params + if self.stop_sequences is not None and stop is not None: + raise ValueError( + "stop sequences found in both the input and default params." + ) + elif self.stop_sequences is not None: + params["stop_sequences"] = self.stop_sequences + else: + params["stop_sequences"] = stop + request = CompletionRequest(prompt=Prompt.from_text(prompt), **params) + response = self.client.complete(model=self.model, request=request) + text = response.completions[0].completion + # If stop tokens are provided, Aleph Alpha's endpoint returns them. + # In order to make this consistent with other endpoints, we strip them. + if stop is not None or self.stop_sequences is not None: + text = enforce_stop_tokens(text, params["stop_sequences"]) + return text diff --git a/langchain/llms/anthropic.py b/langchain/llms/anthropic.py new file mode 100644 index 0000000000000000000000000000000000000000..a5c57a94c6dacf8fa700e12165e8fd15e46fdeef --- /dev/null +++ b/langchain/llms/anthropic.py @@ -0,0 +1,184 @@ +"""Wrapper around Anthropic APIs.""" +import re +from typing import Any, Dict, Generator, List, Mapping, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + + +class Anthropic(LLM, BaseModel): + r"""Wrapper around Anthropic large language models. + + To use, you should have the ``anthropic`` python package installed, and the + environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + import anthropic + from langchain.llms import Anthropic + model = Anthropic(model="", anthropic_api_key="my-api-key") + + # Simplest invocation, automatically wrapped with HUMAN_PROMPT + # and AI_PROMPT. + response = model("What are the biggest risks facing humanity?") + + # Or if you want to use the chat mode, build a few-shot-prompt, or + # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: + raw_prompt = "What are the biggest risks facing humanity?" + prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" + response = model(prompt) + """ + + client: Any #: :meta private: + model: str = "claude-v1" + """Model name to use.""" + + max_tokens_to_sample: int = 256 + """Denotes the number of tokens to predict per generation.""" + + temperature: float = 1.0 + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: int = 0 + """Number of most likely tokens to consider at each step.""" + + top_p: float = 1 + """Total probability mass of tokens to consider at each step.""" + + anthropic_api_key: Optional[str] = None + + HUMAN_PROMPT: Optional[str] = None + AI_PROMPT: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + anthropic_api_key = get_from_dict_or_env( + values, "anthropic_api_key", "ANTHROPIC_API_KEY" + ) + try: + import anthropic + + values["client"] = anthropic.Client(anthropic_api_key) + values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT + values["AI_PROMPT"] = anthropic.AI_PROMPT + except ImportError: + raise ValueError( + "Could not import anthropic python package. " + "Please it install it with `pip install anthropic`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Anthropic API.""" + return { + "max_tokens_to_sample": self.max_tokens_to_sample, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + } + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "anthropic" + + def _wrap_prompt(self, prompt: str) -> str: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if prompt.startswith(self.HUMAN_PROMPT): + return prompt # Already wrapped. + + # Guard against common errors in specifying wrong number of newlines. + corrected_prompt, n_subs = re.subn(r"^\n*Human:", self.HUMAN_PROMPT, prompt) + if n_subs == 1: + return corrected_prompt + + # As a last resort, wrap the prompt ourselves to emulate instruct-style. + return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n" + + def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if stop is None: + stop = [] + + # Never want model to invent new turns of Human / Assistant dialog. + stop.extend([self.HUMAN_PROMPT, self.AI_PROMPT]) + + return stop + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + r"""Call out to Anthropic's completion endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + prompt = "What are the biggest risks facing humanity?" + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + response = model(prompt) + + """ + stop = self._get_anthropic_stop(stop) + response = self.client.completion( + model=self.model, + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **self._default_params, + ) + text = response["completion"] + return text + + def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator: + r"""Call Anthropic completion_stream and return the resulting generator. + + BETA: this is a beta feature while we figure out the right abstraction. + Once that happens, this interface could change. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + A generator representing the stream of tokens from Anthropic. + + Example: + .. code-block:: python + + + prompt = "Write a poem about a stream." + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + generator = anthropic.stream(prompt) + for token in generator: + yield token + """ + stop = self._get_anthropic_stop(stop) + return self.client.completion_stream( + model=self.model, + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **self._default_params, + ) diff --git a/langchain/llms/bananadev.py b/langchain/llms/bananadev.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8fa262a0a3a4bed70123fdfb6e8ed6535d5e86 --- /dev/null +++ b/langchain/llms/bananadev.py @@ -0,0 +1,117 @@ +"""Wrapper around Banana API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class Banana(LLM, BaseModel): + """Wrapper around Banana large language models. + + To use, you should have the ``banana-dev`` python package installed, + and the environment variable ``BANANA_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import Banana + banana = Banana(model_key="") + """ + + model_key: str = "" + """model endpoint to use""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + banana_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + banana_api_key = get_from_dict_or_env( + values, "banana_api_key", "BANANA_API_KEY" + ) + values["banana_api_key"] = banana_api_key + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_key": self.model_key}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "banana" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call to Banana endpoint.""" + try: + import banana_dev as banana + except ImportError: + raise ValueError( + "Could not import banana-dev python package. " + "Please install it with `pip install banana-dev`." + ) + params = self.model_kwargs or {} + api_key = self.banana_api_key + model_key = self.model_key + model_inputs = { + # a json specific to your model. + "prompt": prompt, + **params, + } + response = banana.run(api_key, model_key, model_inputs) + try: + text = response["modelOutputs"][0]["output"] + except (KeyError, TypeError): + returned = response["modelOutputs"][0] + raise ValueError( + "Response should be of schema: {'output': 'text'}." + f"\nResponse was: {returned}" + "\nTo fix this:" + "\n- fork the source repo of the Banana model" + "\n- modify app.py to return the above schema" + "\n- deploy that as a custom repo" + ) + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/base.py b/langchain/llms/base.py new file mode 100644 index 0000000000000000000000000000000000000000..45bc2db4fa630157ba5f1e50dac5968422c0bd6f --- /dev/null +++ b/langchain/llms/base.py @@ -0,0 +1,336 @@ +"""Base interface for large language models to expose.""" +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Mapping, Optional, Tuple, Union + +import yaml +from pydantic import BaseModel, Extra, Field, validator + +import langchain +from langchain.callbacks import get_callback_manager +from langchain.callbacks.base import BaseCallbackManager +from langchain.schema import BaseLanguageModel, Generation, LLMResult, PromptValue + + +def _get_verbosity() -> bool: + return langchain.verbose + + +def get_prompts( + params: Dict[str, Any], prompts: List[str] +) -> Tuple[Dict[int, List], str, List[int], List[str]]: + """Get prompts that are already cached.""" + llm_string = str(sorted([(k, v) for k, v in params.items()])) + missing_prompts = [] + missing_prompt_idxs = [] + existing_prompts = {} + for i, prompt in enumerate(prompts): + if langchain.llm_cache is not None: + cache_val = langchain.llm_cache.lookup(prompt, llm_string) + if isinstance(cache_val, list): + existing_prompts[i] = cache_val + else: + missing_prompts.append(prompt) + missing_prompt_idxs.append(i) + return existing_prompts, llm_string, missing_prompt_idxs, missing_prompts + + +def update_cache( + existing_prompts: Dict[int, List], + llm_string: str, + missing_prompt_idxs: List[int], + new_results: LLMResult, + prompts: List[str], +) -> Optional[dict]: + """Update the cache and get the LLM output.""" + for i, result in enumerate(new_results.generations): + existing_prompts[missing_prompt_idxs[i]] = result + prompt = prompts[missing_prompt_idxs[i]] + if langchain.llm_cache is not None: + langchain.llm_cache.update(prompt, llm_string, result) + llm_output = new_results.llm_output + return llm_output + + +class BaseLLM(BaseLanguageModel, BaseModel, ABC): + """LLM wrapper should take in a prompt and return a string.""" + + cache: Optional[bool] = None + verbose: bool = Field(default_factory=_get_verbosity) + """Whether to print out response text.""" + callback_manager: BaseCallbackManager = Field(default_factory=get_callback_manager) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @validator("callback_manager", pre=True, always=True) + def set_callback_manager( + cls, callback_manager: Optional[BaseCallbackManager] + ) -> BaseCallbackManager: + """If callback manager is None, set it. + + This allows users to pass in None as callback manager, which is a nice UX. + """ + return callback_manager or get_callback_manager() + + @validator("verbose", pre=True, always=True) + def set_verbose(cls, verbose: Optional[bool]) -> bool: + """If verbose is None, set it. + + This allows users to pass in None as verbose to access the global setting. + """ + if verbose is None: + return _get_verbosity() + else: + return verbose + + @abstractmethod + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompts.""" + + @abstractmethod + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompts.""" + + def generate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + prompt_strings = [p.to_string() for p in prompts] + return self.generate(prompt_strings, stop=stop) + + async def agenerate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + prompt_strings = [p.to_string() for p in prompts] + return await self.agenerate(prompt_strings, stop=stop) + + def generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompt and input.""" + # If string is passed in directly no errors will be raised but outputs will + # not make sense. + if not isinstance(prompts, list): + raise ValueError( + "Argument 'prompts' is expected to be of type List[str], received" + f" argument of type {type(prompts)}." + ) + disregard_cache = self.cache is not None and not self.cache + if langchain.llm_cache is None or disregard_cache: + # This happens when langchain.cache is None, but self.cache is True + if self.cache is not None and self.cache: + raise ValueError( + "Asked to cache, but no cache found at `langchain.cache`." + ) + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompts, verbose=self.verbose + ) + try: + output = self._generate(prompts, stop=stop) + except (KeyboardInterrupt, Exception) as e: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + self.callback_manager.on_llm_end(output, verbose=self.verbose) + return output + params = self.dict() + params["stop"] = stop + ( + existing_prompts, + llm_string, + missing_prompt_idxs, + missing_prompts, + ) = get_prompts(params, prompts) + if len(missing_prompts) > 0: + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, missing_prompts, verbose=self.verbose + ) + try: + new_results = self._generate(missing_prompts, stop=stop) + except (KeyboardInterrupt, Exception) as e: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + self.callback_manager.on_llm_end(new_results, verbose=self.verbose) + llm_output = update_cache( + existing_prompts, llm_string, missing_prompt_idxs, new_results, prompts + ) + else: + llm_output = {} + generations = [existing_prompts[i] for i in range(len(prompts))] + return LLMResult(generations=generations, llm_output=llm_output) + + async def agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompt and input.""" + disregard_cache = self.cache is not None and not self.cache + if langchain.llm_cache is None or disregard_cache: + # This happens when langchain.cache is None, but self.cache is True + if self.cache is not None and self.cache: + raise ValueError( + "Asked to cache, but no cache found at `langchain.cache`." + ) + if self.callback_manager.is_async: + await self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompts, verbose=self.verbose + ) + else: + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, prompts, verbose=self.verbose + ) + try: + output = await self._agenerate(prompts, stop=stop) + except (KeyboardInterrupt, Exception) as e: + if self.callback_manager.is_async: + await self.callback_manager.on_llm_error(e, verbose=self.verbose) + else: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + if self.callback_manager.is_async: + await self.callback_manager.on_llm_end(output, verbose=self.verbose) + else: + self.callback_manager.on_llm_end(output, verbose=self.verbose) + return output + params = self.dict() + params["stop"] = stop + ( + existing_prompts, + llm_string, + missing_prompt_idxs, + missing_prompts, + ) = get_prompts(params, prompts) + if len(missing_prompts) > 0: + if self.callback_manager.is_async: + await self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, + missing_prompts, + verbose=self.verbose, + ) + else: + self.callback_manager.on_llm_start( + {"name": self.__class__.__name__}, + missing_prompts, + verbose=self.verbose, + ) + try: + new_results = await self._agenerate(missing_prompts, stop=stop) + except (KeyboardInterrupt, Exception) as e: + if self.callback_manager.is_async: + await self.callback_manager.on_llm_error(e, verbose=self.verbose) + else: + self.callback_manager.on_llm_error(e, verbose=self.verbose) + raise e + if self.callback_manager.is_async: + await self.callback_manager.on_llm_end( + new_results, verbose=self.verbose + ) + else: + self.callback_manager.on_llm_end(new_results, verbose=self.verbose) + llm_output = update_cache( + existing_prompts, llm_string, missing_prompt_idxs, new_results, prompts + ) + else: + llm_output = {} + generations = [existing_prompts[i] for i in range(len(prompts))] + return LLMResult(generations=generations, llm_output=llm_output) + + def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Check Cache and run the LLM on the given prompt and input.""" + return self.generate([prompt], stop=stop).generations[0][0].text + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {} + + def __str__(self) -> str: + """Get a string representation of the object for printing.""" + cls_name = f"\033[1m{self.__class__.__name__}\033[0m" + return f"{cls_name}\nParams: {self._identifying_params}" + + @property + @abstractmethod + def _llm_type(self) -> str: + """Return type of llm.""" + + def dict(self, **kwargs: Any) -> Dict: + """Return a dictionary of the LLM.""" + starter_dict = dict(self._identifying_params) + starter_dict["_type"] = self._llm_type + return starter_dict + + def save(self, file_path: Union[Path, str]) -> None: + """Save the LLM. + + Args: + file_path: Path to file to save the LLM to. + + Example: + .. code-block:: python + + llm.save(file_path="path/llm.yaml") + """ + # Convert file to Path object. + if isinstance(file_path, str): + save_path = Path(file_path) + else: + save_path = file_path + + directory_path = save_path.parent + directory_path.mkdir(parents=True, exist_ok=True) + + # Fetch dictionary to save + prompt_dict = self.dict() + + if save_path.suffix == ".json": + with open(file_path, "w") as f: + json.dump(prompt_dict, f, indent=4) + elif save_path.suffix == ".yaml": + with open(file_path, "w") as f: + yaml.dump(prompt_dict, f, default_flow_style=False) + else: + raise ValueError(f"{save_path} must be json or yaml") + + +class LLM(BaseLLM): + """LLM class that expect subclasses to implement a simpler call method. + + The purpose of this class is to expose a simpler interface for working + with LLMs, rather than expect the user to implement the full _generate method. + """ + + @abstractmethod + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Run the LLM on the given prompt and input.""" + + async def _acall(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Run the LLM on the given prompt and input.""" + raise NotImplementedError("Async generation not implemented for this LLM.") + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompt and input.""" + # TODO: add caching here. + generations = [] + for prompt in prompts: + text = self._call(prompt, stop=stop) + generations.append([Generation(text=text)]) + return LLMResult(generations=generations) + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Run the LLM on the given prompt and input.""" + generations = [] + for prompt in prompts: + text = await self._acall(prompt, stop=stop) + generations.append([Generation(text=text)]) + return LLMResult(generations=generations) diff --git a/langchain/llms/cerebriumai.py b/langchain/llms/cerebriumai.py new file mode 100644 index 0000000000000000000000000000000000000000..29f0d2fc21988c39c012fcb1bff21cb8a106c936 --- /dev/null +++ b/langchain/llms/cerebriumai.py @@ -0,0 +1,103 @@ +"""Wrapper around CerebriumAI API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class CerebriumAI(LLM, BaseModel): + """Wrapper around CerebriumAI large language models. + + To use, you should have the ``cerebrium`` python package installed, and the + environment variable ``CEREBRIUMAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import CerebriumAI + cerebrium = CerebriumAI(endpoint_url="") + + """ + + endpoint_url: str = "" + """model endpoint to use""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + cerebriumai_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + cerebriumai_api_key = get_from_dict_or_env( + values, "cerebriumai_api_key", "CEREBRIUMAI_API_KEY" + ) + values["cerebriumai_api_key"] = cerebriumai_api_key + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"endpoint_url": self.endpoint_url}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "cerebriumai" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call to CerebriumAI endpoint.""" + try: + from cerebrium import model_api_request + except ImportError: + raise ValueError( + "Could not import cerebrium python package. " + "Please install it with `pip install cerebrium`." + ) + + params = self.model_kwargs or {} + response = model_api_request( + self.endpoint_url, {"prompt": prompt, **params}, self.cerebriumai_api_key + ) + text = response["data"]["result"] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/cohere.py b/langchain/llms/cohere.py new file mode 100644 index 0000000000000000000000000000000000000000..2335dba7648bcd906403e101dc3867403fb3e264 --- /dev/null +++ b/langchain/llms/cohere.py @@ -0,0 +1,132 @@ +"""Wrapper around Cohere APIs.""" +import logging +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class Cohere(LLM, BaseModel): + """Wrapper around Cohere large language models. + + To use, you should have the ``cohere`` python package installed, and the + environment variable ``COHERE_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.llms import Cohere + cohere = Cohere(model="gptd-instruct-tft", cohere_api_key="my-api-key") + """ + + client: Any #: :meta private: + model: Optional[str] = None + """Model name to use.""" + + max_tokens: int = 256 + """Denotes the number of tokens to predict per generation.""" + + temperature: float = 0.75 + """A non-negative float that tunes the degree of randomness in generation.""" + + k: int = 0 + """Number of most likely tokens to consider at each step.""" + + p: int = 1 + """Total probability mass of tokens to consider at each step.""" + + frequency_penalty: float = 0.0 + """Penalizes repeated tokens according to frequency. Between 0 and 1.""" + + presence_penalty: float = 0.0 + """Penalizes repeated tokens. Between 0 and 1.""" + + truncate: Optional[str] = None + """Specify how the client handles inputs longer than the maximum token + length: Truncate from START, END or NONE""" + + cohere_api_key: Optional[str] = None + + stop: Optional[List[str]] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + cohere_api_key = get_from_dict_or_env( + values, "cohere_api_key", "COHERE_API_KEY" + ) + try: + import cohere + + values["client"] = cohere.Client(cohere_api_key) + except ImportError: + raise ValueError( + "Could not import cohere python package. " + "Please it install it with `pip install cohere`." + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling Cohere API.""" + return { + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "k": self.k, + "p": self.p, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "truncate": self.truncate, + } + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return {**{"model": self.model}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "cohere" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Cohere's generate endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = cohere("Tell me a joke.") + """ + params = self._default_params + if self.stop is not None and stop is not None: + raise ValueError("`stop` found in both the input and default params.") + elif self.stop is not None: + params["stop_sequences"] = self.stop + else: + params["stop_sequences"] = stop + + response = self.client.generate(model=self.model, prompt=prompt, **params) + text = response.generations[0].text + # If stop tokens are provided, Cohere's endpoint returns them. + # In order to make this consistent with other endpoints, we strip them. + if stop is not None or self.stop is not None: + text = enforce_stop_tokens(text, params["stop_sequences"]) + return text diff --git a/langchain/llms/deepinfra.py b/langchain/llms/deepinfra.py new file mode 100644 index 0000000000000000000000000000000000000000..8993a4bf3b920f4a4ec81d3b61c513b69eccbf86 --- /dev/null +++ b/langchain/llms/deepinfra.py @@ -0,0 +1,97 @@ +"""Wrapper around DeepInfra APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +DEFAULT_MODEL_ID = "google/flan-t5-xl" + + +class DeepInfra(LLM, BaseModel): + """Wrapper around DeepInfra deployed models. + + To use, you should have the ``requests`` python package installed, and the + environment variable ``DEEPINFRA_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Only supports `text-generation` and `text2text-generation` for now. + + Example: + .. code-block:: python + + from langchain.llms import DeepInfra + di = DeepInfra(model_id="google/flan-t5-xl", + deepinfra_api_token="my-api-key") + """ + + model_id: str = DEFAULT_MODEL_ID + model_kwargs: Optional[dict] = None + + deepinfra_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + deepinfra_api_token = get_from_dict_or_env( + values, "deepinfra_api_token", "DEEPINFRA_API_TOKEN" + ) + values["deepinfra_api_token"] = deepinfra_api_token + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_id": self.model_id}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "deepinfra" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to DeepInfra's inference API endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = di("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + + res = requests.post( + f"https://api.deepinfra.com/v1/inference/{self.model_id}", + headers={ + "Authorization": f"bearer {self.deepinfra_api_token}", + "Content-Type": "application/json", + }, + json={"input": prompt, **_model_kwargs}, + ) + + if res.status_code != 200: + raise ValueError("Error raised by inference API") + text = res.json()[0]["generated_text"] + + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/fake.py b/langchain/llms/fake.py new file mode 100644 index 0000000000000000000000000000000000000000..96f766f993474eab8b74cb856eb0bc4a9e857449 --- /dev/null +++ b/langchain/llms/fake.py @@ -0,0 +1,28 @@ +"""Fake LLM wrapper for testing purposes.""" +from typing import Any, List, Mapping, Optional + +from pydantic import BaseModel + +from langchain.llms.base import LLM + + +class FakeListLLM(LLM, BaseModel): + """Fake LLM wrapper for testing purposes.""" + + responses: List + i: int = 0 + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake-list" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """First try to lookup in queries, else return 'foo' or 'bar'.""" + response = self.responses[self.i] + self.i += 1 + return response + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {} diff --git a/langchain/llms/forefrontai.py b/langchain/llms/forefrontai.py new file mode 100644 index 0000000000000000000000000000000000000000..806bcd85454acfb46eaeecbc318a93301abb4a6e --- /dev/null +++ b/langchain/llms/forefrontai.py @@ -0,0 +1,113 @@ +"""Wrapper around ForefrontAI APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + + +class ForefrontAI(LLM, BaseModel): + """Wrapper around ForefrontAI large language models. + + To use, you should have the environment variable ``FOREFRONTAI_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain.llms import ForefrontAI + forefrontai = ForefrontAI(endpoint_url="") + """ + + endpoint_url: str = "" + """Model name to use.""" + + temperature: float = 0.7 + """What sampling temperature to use.""" + + length: int = 256 + """The maximum number of tokens to generate in the completion.""" + + top_p: float = 1.0 + """Total probability mass of tokens to consider at each step.""" + + top_k: int = 40 + """The number of highest probability vocabulary tokens to + keep for top-k-filtering.""" + + repetition_penalty: int = 1 + """Penalizes repeated tokens according to frequency.""" + + forefrontai_api_key: Optional[str] = None + + base_url: Optional[str] = None + """Base url to use, if None decides based on model name.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + forefrontai_api_key = get_from_dict_or_env( + values, "forefrontai_api_key", "FOREFRONTAI_API_KEY" + ) + values["forefrontai_api_key"] = forefrontai_api_key + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling ForefrontAI API.""" + return { + "temperature": self.temperature, + "length": self.length, + "top_p": self.top_p, + "top_k": self.top_k, + "repetition_penalty": self.repetition_penalty, + } + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"endpoint_url": self.endpoint_url}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "forefrontai" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to ForefrontAI's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = ForefrontAI("Tell me a joke.") + """ + response = requests.post( + url=self.endpoint_url, + headers={ + "Authorization": f"Bearer {self.forefrontai_api_key}", + "Content-Type": "application/json", + }, + json={"text": prompt, **self._default_params}, + ) + response_json = response.json() + text = response_json["result"][0]["completion"] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/gooseai.py b/langchain/llms/gooseai.py new file mode 100644 index 0000000000000000000000000000000000000000..89f17f18d326b3d8cddefe101ad977ed7fe7584b --- /dev/null +++ b/langchain/llms/gooseai.py @@ -0,0 +1,143 @@ +"""Wrapper around GooseAI API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class GooseAI(LLM, BaseModel): + """Wrapper around OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``GOOSEAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import GooseAI + gooseai = GooseAI(model_name="gpt-neo-20b") + + """ + + client: Any + + model_name: str = "gpt-neo-20b" + """Model name to use""" + + temperature: float = 0.7 + """What sampling temperature to use""" + + max_tokens: int = 256 + """The maximum number of tokens to generate in the completion. + -1 returns as many tokens as possible given the prompt and + the models maximal context size.""" + + top_p: float = 1 + """Total probability mass of tokens to consider at each step.""" + + min_tokens: int = 1 + """The minimum number of tokens to generate in the completion.""" + + frequency_penalty: float = 0 + """Penalizes repeated tokens according to frequency.""" + + presence_penalty: float = 0 + """Penalizes repeated tokens.""" + + n: int = 1 + """How many completions to generate for each prompt.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + + logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict) + """Adjust the probability of specific tokens being generated.""" + + gooseai_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.ignore + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + gooseai_api_key = get_from_dict_or_env( + values, "gooseai_api_key", "GOOSEAI_API_KEY" + ) + try: + import openai + + openai.api_key = gooseai_api_key + openai.api_base = "https://api.goose.ai/v1" + values["client"] = openai.Completion + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please install it with `pip install openai`." + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling GooseAI API.""" + normal_params = { + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, + "min_tokens": self.min_tokens, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "n": self.n, + "logit_bias": self.logit_bias, + } + return {**normal_params, **self.model_kwargs} + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "gooseai" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call the GooseAI API.""" + params = self._default_params + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + + response = self.client.create(engine=self.model_name, prompt=prompt, **params) + text = response.choices[0].text + return text diff --git a/langchain/llms/huggingface_endpoint.py b/langchain/llms/huggingface_endpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..027ff91760481e81149d6c8428bc221607b8e5f1 --- /dev/null +++ b/langchain/llms/huggingface_endpoint.py @@ -0,0 +1,143 @@ +"""Wrapper around HuggingFace APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +VALID_TASKS = ("text2text-generation", "text-generation") + + +class HuggingFaceEndpoint(LLM, BaseModel): + """Wrapper around HuggingFaceHub Inference Endpoints. + + To use, you should have the ``huggingface_hub`` python package installed, and the + environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Only supports `text-generation` and `text2text-generation` for now. + + Example: + .. code-block:: python + + from langchain.llms import HuggingFaceEndpoint + endpoint_url = ( + "https://abcdefghijklmnop.us-east-1.aws.endpoints.huggingface.cloud" + ) + hf = HuggingFaceEndpoint( + endpoint_url=endpoint_url, + huggingfacehub_api_token="my-api-key" + ) + """ + + endpoint_url: str = "" + """Endpoint URL to use.""" + task: Optional[str] = None + """Task to call the model with. Should be a task that returns `generated_text`.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + + huggingfacehub_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + try: + from huggingface_hub.hf_api import HfApi + + try: + HfApi( + endpoint="https://huggingface.co", # Can be a Private Hub endpoint. + token=huggingfacehub_api_token, + ).whoami() + except Exception as e: + raise ValueError( + "Could not authenticate with huggingface_hub. " + "Please check your API token." + ) from e + + except ImportError: + raise ValueError( + "Could not import huggingface_hub python package. " + "Please it install it with `pip install huggingface_hub`." + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + _model_kwargs = self.model_kwargs or {} + return { + **{"endpoint_url": self.endpoint_url, "task": self.task}, + **{"model_kwargs": _model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "huggingface_endpoint" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to HuggingFace Hub's inference endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = hf("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + + # payload samples + parameter_payload = {"inputs": prompt, "parameters": _model_kwargs} + + # HTTP headers for authorization + headers = { + "Authorization": f"Bearer {self.huggingfacehub_api_token}", + "Content-Type": "application/json", + } + + # send request + try: + response = requests.post( + self.endpoint_url, headers=headers, json=parameter_payload + ) + except requests.exceptions.RequestException as e: # This is the correct syntax + raise ValueError(f"Error raised by inference endpoint: {e}") + generated_text = response.json() + if "error" in generated_text: + raise ValueError( + f"Error raised by inference API: {generated_text['error']}" + ) + if self.task == "text-generation": + # Text generation return includes the starter text. + text = generated_text[0]["generated_text"][len(prompt) :] + elif self.task == "text2text-generation": + text = generated_text[0]["generated_text"] + else: + raise ValueError( + f"Got invalid task {self.task}, " + f"currently only {VALID_TASKS} are supported" + ) + if stop is not None: + # This is a bit hacky, but I can't figure out a better way to enforce + # stop tokens when making calls to huggingface_hub. + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/huggingface_hub.py b/langchain/llms/huggingface_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..b9c4098879a9805f2a7a163c9cb2f4296746aabc --- /dev/null +++ b/langchain/llms/huggingface_hub.py @@ -0,0 +1,120 @@ +"""Wrapper around HuggingFace APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +DEFAULT_REPO_ID = "gpt2" +VALID_TASKS = ("text2text-generation", "text-generation") + + +class HuggingFaceHub(LLM, BaseModel): + """Wrapper around HuggingFaceHub models. + + To use, you should have the ``huggingface_hub`` python package installed, and the + environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Only supports `text-generation` and `text2text-generation` for now. + + Example: + .. code-block:: python + + from langchain.llms import HuggingFaceHub + hf = HuggingFaceHub(repo_id="gpt2", huggingfacehub_api_token="my-api-key") + """ + + client: Any #: :meta private: + repo_id: str = DEFAULT_REPO_ID + """Model name to use.""" + task: Optional[str] = None + """Task to call the model with. Should be a task that returns `generated_text`.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + + huggingfacehub_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + try: + from huggingface_hub.inference_api import InferenceApi + + repo_id = values["repo_id"] + client = InferenceApi( + repo_id=repo_id, + token=huggingfacehub_api_token, + task=values.get("task"), + ) + if client.task not in VALID_TASKS: + raise ValueError( + f"Got invalid task {client.task}, " + f"currently only {VALID_TASKS} are supported" + ) + values["client"] = client + except ImportError: + raise ValueError( + "Could not import huggingface_hub python package. " + "Please it install it with `pip install huggingface_hub`." + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + _model_kwargs = self.model_kwargs or {} + return { + **{"repo_id": self.repo_id, "task": self.task}, + **{"model_kwargs": _model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "huggingface_hub" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to HuggingFace Hub's inference endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = hf("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + response = self.client(inputs=prompt, params=_model_kwargs) + if "error" in response: + raise ValueError(f"Error raised by inference API: {response['error']}") + if self.client.task == "text-generation": + # Text generation return includes the starter text. + text = response[0]["generated_text"][len(prompt) :] + elif self.client.task == "text2text-generation": + text = response[0]["generated_text"] + else: + raise ValueError( + f"Got invalid task {self.client.task}, " + f"currently only {VALID_TASKS} are supported" + ) + if stop is not None: + # This is a bit hacky, but I can't figure out a better way to enforce + # stop tokens when making calls to huggingface_hub. + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/huggingface_pipeline.py b/langchain/llms/huggingface_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..1138839cf630682ac4147fd0eacf20c0bec16014 --- /dev/null +++ b/langchain/llms/huggingface_pipeline.py @@ -0,0 +1,165 @@ +"""Wrapper around HuggingFace Pipeline APIs.""" +import importlib.util +import logging +from typing import Any, List, Mapping, Optional + +from pydantic import BaseModel, Extra + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + +DEFAULT_MODEL_ID = "gpt2" +DEFAULT_TASK = "text-generation" +VALID_TASKS = ("text2text-generation", "text-generation") + +logger = logging.getLogger() + + +class HuggingFacePipeline(LLM, BaseModel): + """Wrapper around HuggingFace Pipeline API. + + To use, you should have the ``transformers`` python package installed. + + Only supports `text-generation` and `text2text-generation` for now. + + Example using from_model_id: + .. code-block:: python + + from langchain.llms import HuggingFacePipeline + hf = HuggingFacePipeline.from_model_id( + model_id="gpt2", task="text-generation" + ) + Example passing pipeline in directly: + .. code-block:: python + + from langchain.llms import HuggingFacePipeline + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_id = "gpt2" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + pipe = pipeline( + "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10 + ) + hf = HuggingFacePipeline(pipeline=pipe) + """ + + pipeline: Any #: :meta private: + model_id: str = DEFAULT_MODEL_ID + """Model name to use.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @classmethod + def from_model_id( + cls, + model_id: str, + task: str, + device: int = -1, + model_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> LLM: + """Construct the pipeline object from model_id and task.""" + try: + from transformers import ( + AutoModelForCausalLM, + AutoModelForSeq2SeqLM, + AutoTokenizer, + ) + from transformers import pipeline as hf_pipeline + + except ImportError: + raise ValueError( + "Could not import transformers python package. " + "Please it install it with `pip install transformers`." + ) + + _model_kwargs = model_kwargs or {} + tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs) + + try: + if task == "text-generation": + model = AutoModelForCausalLM.from_pretrained(model_id, **_model_kwargs) + elif task == "text2text-generation": + model = AutoModelForSeq2SeqLM.from_pretrained(model_id, **_model_kwargs) + else: + raise ValueError( + f"Got invalid task {task}, " + f"currently only {VALID_TASKS} are supported" + ) + except ImportError as e: + raise ValueError( + f"Could not load the {task} model due to missing dependencies." + ) from e + + if importlib.util.find_spec("torch") is not None: + import torch + + cuda_device_count = torch.cuda.device_count() + if device < -1 or (device >= cuda_device_count): + raise ValueError( + f"Got device=={device}, " + f"device is required to be within [-1, {cuda_device_count})" + ) + if device < 0 and cuda_device_count > 0: + logger.warning( + "Device has %d GPUs available. " + "Provide device={deviceId} to `from_model_id` to use available" + "GPUs for execution. deviceId is -1 (default) for CPU and " + "can be a positive integer associated with CUDA device id.", + cuda_device_count, + ) + + pipeline = hf_pipeline( + task=task, + model=model, + tokenizer=tokenizer, + device=device, + model_kwargs=_model_kwargs, + ) + if pipeline.task not in VALID_TASKS: + raise ValueError( + f"Got invalid task {pipeline.task}, " + f"currently only {VALID_TASKS} are supported" + ) + return cls( + pipeline=pipeline, + model_id=model_id, + model_kwargs=_model_kwargs, + **kwargs, + ) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_id": self.model_id}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + return "huggingface_pipeline" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + response = self.pipeline(prompt) + if self.pipeline.task == "text-generation": + # Text generation return includes the starter text. + text = response[0]["generated_text"][len(prompt) :] + elif self.pipeline.task == "text2text-generation": + text = response[0]["generated_text"] + else: + raise ValueError( + f"Got invalid task {self.pipeline.task}, " + f"currently only {VALID_TASKS} are supported" + ) + if stop is not None: + # This is a bit hacky, but I can't figure out a better way to enforce + # stop tokens when making calls to huggingface_hub. + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/loading.py b/langchain/llms/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..723606bee63fa5bbefc214aa3929bfcb27427d5f --- /dev/null +++ b/langchain/llms/loading.py @@ -0,0 +1,42 @@ +"""Base interface for loading large language models apis.""" +import json +from pathlib import Path +from typing import Union + +import yaml + +from langchain.llms import type_to_cls_dict +from langchain.llms.base import BaseLLM + + +def load_llm_from_config(config: dict) -> BaseLLM: + """Load LLM from Config Dict.""" + if "_type" not in config: + raise ValueError("Must specify an LLM Type in config") + config_type = config.pop("_type") + + if config_type not in type_to_cls_dict: + raise ValueError(f"Loading {config_type} LLM not supported") + + llm_cls = type_to_cls_dict[config_type] + return llm_cls(**config) + + +def load_llm(file: Union[str, Path]) -> BaseLLM: + """Load LLM from file.""" + # Convert file to Path object. + if isinstance(file, str): + file_path = Path(file) + else: + file_path = file + # Load from either json or yaml. + if file_path.suffix == ".json": + with open(file_path) as f: + config = json.load(f) + elif file_path.suffix == ".yaml": + with open(file_path, "r") as f: + config = yaml.safe_load(f) + else: + raise ValueError("File type must be json or yaml") + # Load the LLM from the config now. + return load_llm_from_config(config) diff --git a/langchain/llms/manifest.py b/langchain/llms/manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..b9a4ce145c4fa7701f790fd0a3156fbd0ff7a0cb --- /dev/null +++ b/langchain/llms/manifest.py @@ -0,0 +1,54 @@ +"""Wrapper around HazyResearch's Manifest library.""" +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM + + +class ManifestWrapper(LLM, BaseModel): + """Wrapper around HazyResearch's Manifest library.""" + + client: Any #: :meta private: + llm_kwargs: Optional[Dict] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that python package exists in environment.""" + try: + from manifest import Manifest + + if not isinstance(values["client"], Manifest): + raise ValueError + except ImportError: + raise ValueError( + "Could not import manifest python package. " + "Please it install it with `pip install manifest-ml`." + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + kwargs = self.llm_kwargs or {} + return {**self.client.client.get_model_params(), **kwargs} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "manifest" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to LLM through Manifest.""" + if stop is not None and len(stop) != 1: + raise NotImplementedError( + f"Manifest currently only supports a single stop token, got {stop}" + ) + kwargs = self.llm_kwargs or {} + if stop is not None: + kwargs["stop_token"] = stop + return self.client.run(prompt, **kwargs) diff --git a/langchain/llms/modal.py b/langchain/llms/modal.py new file mode 100644 index 0000000000000000000000000000000000000000..5037858a31e194f9ac71a1f56520bf75fc914418 --- /dev/null +++ b/langchain/llms/modal.py @@ -0,0 +1,92 @@ +"""Wrapper around Modal API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + +logger = logging.getLogger(__name__) + + +class Modal(LLM, BaseModel): + """Wrapper around Modal large language models. + + To use, you should have the ``modal-client`` python package installed. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import Modal + modal = Modal(endpoint_url="") + + """ + + endpoint_url: str = "" + """model endpoint to use""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"endpoint_url": self.endpoint_url}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "modal" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call to Modal endpoint.""" + params = self.model_kwargs or {} + response = requests.post( + url=self.endpoint_url, + headers={ + "Content-Type": "application/json", + }, + json={"prompt": prompt, **params}, + ) + try: + if prompt in response.json()["prompt"]: + response_json = response.json() + except KeyError: + raise ValueError("LangChain requires 'prompt' key in response.") + text = response_json["prompt"] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/nlpcloud.py b/langchain/llms/nlpcloud.py new file mode 100644 index 0000000000000000000000000000000000000000..2c04c41960d480d18dc29885445ac99af95ee421 --- /dev/null +++ b/langchain/llms/nlpcloud.py @@ -0,0 +1,141 @@ +"""Wrapper around NLPCloud APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env + + +class NLPCloud(LLM, BaseModel): + """Wrapper around NLPCloud large language models. + + To use, you should have the ``nlpcloud`` python package installed, and the + environment variable ``NLPCLOUD_API_KEY`` set with your API key. + + Example: + .. code-block:: python + + from langchain.llms import NLPCloud + nlpcloud = NLPCloud(model="gpt-neox-20b") + """ + + client: Any #: :meta private: + model_name: str = "finetuned-gpt-neox-20b" + """Model name to use.""" + temperature: float = 0.7 + """What sampling temperature to use.""" + min_length: int = 1 + """The minimum number of tokens to generate in the completion.""" + max_length: int = 256 + """The maximum number of tokens to generate in the completion.""" + length_no_input: bool = True + """Whether min_length and max_length should include the length of the input.""" + remove_input: bool = True + """Remove input text from API response""" + remove_end_sequence: bool = True + """Whether or not to remove the end sequence token.""" + bad_words: List[str] = [] + """List of tokens not allowed to be generated.""" + top_p: int = 1 + """Total probability mass of tokens to consider at each step.""" + top_k: int = 50 + """The number of highest probability tokens to keep for top-k filtering.""" + repetition_penalty: float = 1.0 + """Penalizes repeated tokens. 1.0 means no penalty.""" + length_penalty: float = 1.0 + """Exponential penalty to the length.""" + do_sample: bool = True + """Whether to use sampling (True) or greedy decoding.""" + num_beams: int = 1 + """Number of beams for beam search.""" + early_stopping: bool = False + """Whether to stop beam search at num_beams sentences.""" + num_return_sequences: int = 1 + """How many completions to generate for each prompt.""" + + nlpcloud_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + nlpcloud_api_key = get_from_dict_or_env( + values, "nlpcloud_api_key", "NLPCLOUD_API_KEY" + ) + try: + import nlpcloud + + values["client"] = nlpcloud.Client( + values["model_name"], nlpcloud_api_key, gpu=True, lang="en" + ) + except ImportError: + raise ValueError( + "Could not import nlpcloud python package. " + "Please it install it with `pip install nlpcloud`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling NLPCloud API.""" + return { + "temperature": self.temperature, + "min_length": self.min_length, + "max_length": self.max_length, + "length_no_input": self.length_no_input, + "remove_input": self.remove_input, + "remove_end_sequence": self.remove_end_sequence, + "bad_words": self.bad_words, + "top_p": self.top_p, + "top_k": self.top_k, + "repetition_penalty": self.repetition_penalty, + "length_penalty": self.length_penalty, + "do_sample": self.do_sample, + "num_beams": self.num_beams, + "early_stopping": self.early_stopping, + "num_return_sequences": self.num_return_sequences, + } + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "nlpcloud" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to NLPCloud's create endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Not supported by this interface (pass in init method) + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = nlpcloud("Tell me a joke.") + """ + if stop and len(stop) > 1: + raise ValueError( + "NLPCloud only supports a single stop sequence per generation." + "Pass in a list of length 1." + ) + elif stop and len(stop) == 1: + end_sequence = stop[0] + else: + end_sequence = None + response = self.client.generation( + prompt, end_sequence=end_sequence, **self._default_params + ) + return response["generated_text"] diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py new file mode 100644 index 0000000000000000000000000000000000000000..761d19a5ac4855d507badf95318f38efb9a989ba --- /dev/null +++ b/langchain/llms/openai.py @@ -0,0 +1,738 @@ +"""Wrapper around OpenAI APIs.""" +from __future__ import annotations + +import logging +import sys +import warnings +from typing import ( + Any, + Callable, + Dict, + Generator, + List, + Mapping, + Optional, + Set, + Tuple, + Union, +) + +from pydantic import BaseModel, Extra, Field, root_validator +from tenacity import ( + before_sleep_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from langchain.llms.base import BaseLLM +from langchain.schema import Generation, LLMResult +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +def update_token_usage( + keys: Set[str], response: Dict[str, Any], token_usage: Dict[str, Any] +) -> None: + """Update token usage.""" + _keys_to_use = keys.intersection(response["usage"]) + for _key in _keys_to_use: + if _key not in token_usage: + token_usage[_key] = response["usage"][_key] + else: + token_usage[_key] += response["usage"][_key] + + +def _update_response(response: Dict[str, Any], stream_response: Dict[str, Any]) -> None: + """Update response from the stream response.""" + response["choices"][0]["text"] += stream_response["choices"][0]["text"] + response["choices"][0]["finish_reason"] = stream_response["choices"][0][ + "finish_reason" + ] + response["choices"][0]["logprobs"] = stream_response["choices"][0]["logprobs"] + + +def _streaming_response_template() -> Dict[str, Any]: + return { + "choices": [ + { + "text": "", + "finish_reason": None, + "logprobs": None, + } + ] + } + + +def _create_retry_decorator(llm: Union[BaseOpenAI, OpenAIChat]) -> Callable[[Any], Any]: + import openai + + min_seconds = 4 + max_seconds = 10 + # Wait 2^x * 1 second between each retry starting with + # 4 seconds, then up to 10 seconds, then 10 seconds afterwards + return retry( + reraise=True, + stop=stop_after_attempt(llm.max_retries), + wait=wait_exponential(multiplier=1, min=min_seconds, max=max_seconds), + retry=( + retry_if_exception_type(openai.error.Timeout) + | retry_if_exception_type(openai.error.APIError) + | retry_if_exception_type(openai.error.APIConnectionError) + | retry_if_exception_type(openai.error.RateLimitError) + | retry_if_exception_type(openai.error.ServiceUnavailableError) + ), + before_sleep=before_sleep_log(logger, logging.WARNING), + ) + + +def completion_with_retry(llm: Union[BaseOpenAI, OpenAIChat], **kwargs: Any) -> Any: + """Use tenacity to retry the completion call.""" + retry_decorator = _create_retry_decorator(llm) + + @retry_decorator + def _completion_with_retry(**kwargs: Any) -> Any: + return llm.client.create(**kwargs) + + return _completion_with_retry(**kwargs) + + +async def acompletion_with_retry( + llm: Union[BaseOpenAI, OpenAIChat], **kwargs: Any +) -> Any: + """Use tenacity to retry the async completion call.""" + retry_decorator = _create_retry_decorator(llm) + + @retry_decorator + async def _completion_with_retry(**kwargs: Any) -> Any: + # Use OpenAI's async api https://github.com/openai/openai-python#async-api + return await llm.client.acreate(**kwargs) + + return await _completion_with_retry(**kwargs) + + +class BaseOpenAI(BaseLLM, BaseModel): + """Wrapper around OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.llms import OpenAI + openai = OpenAI(model_name="text-davinci-003") + """ + + client: Any #: :meta private: + model_name: str = "text-davinci-003" + """Model name to use.""" + temperature: float = 0.7 + """What sampling temperature to use.""" + max_tokens: int = 256 + """The maximum number of tokens to generate in the completion. + -1 returns as many tokens as possible given the prompt and + the models maximal context size.""" + top_p: float = 1 + """Total probability mass of tokens to consider at each step.""" + frequency_penalty: float = 0 + """Penalizes repeated tokens according to frequency.""" + presence_penalty: float = 0 + """Penalizes repeated tokens.""" + n: int = 1 + """How many completions to generate for each prompt.""" + best_of: int = 1 + """Generates best_of completions server-side and returns the "best".""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + openai_api_key: Optional[str] = None + batch_size: int = 20 + """Batch size to use when passing multiple documents to generate.""" + request_timeout: Optional[Union[float, Tuple[float, float]]] = None + """Timeout for requests to OpenAI completion API. Default is 600 seconds.""" + logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict) + """Adjust the probability of specific tokens being generated.""" + max_retries: int = 6 + """Maximum number of retries to make when generating.""" + streaming: bool = False + """Whether to stream the results or not.""" + + def __new__(cls, **data: Any) -> Union[OpenAIChat, BaseOpenAI]: # type: ignore + """Initialize the OpenAI object.""" + model_name = data.get("model_name", "") + if model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-4"): + warnings.warn( + "You are trying to use a chat model. This way of initializing it is " + "no longer supported. Instead, please use: " + "`from langchain.chat_models import ChatOpenAI`" + ) + return OpenAIChat(**data) + return super().__new__(cls) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.ignore + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + values["client"] = openai.Completion + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + if values["streaming"] and values["n"] > 1: + raise ValueError("Cannot stream results when n > 1.") + if values["streaming"] and values["best_of"] > 1: + raise ValueError("Cannot stream results when best_of > 1.") + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + normal_params = { + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "n": self.n, + "best_of": self.best_of, + "request_timeout": self.request_timeout, + "logit_bias": self.logit_bias, + } + return {**normal_params, **self.model_kwargs} + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Call out to OpenAI's endpoint with k unique prompts. + + Args: + prompts: The prompts to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The full LLM output. + + Example: + .. code-block:: python + + response = openai.generate(["Tell me a joke."]) + """ + # TODO: write a unit test for this + params = self._invocation_params + sub_prompts = self.get_sub_prompts(params, prompts, stop) + choices = [] + token_usage: Dict[str, int] = {} + # Get the token usage from the response. + # Includes prompt, completion, and total tokens used. + _keys = {"completion_tokens", "prompt_tokens", "total_tokens"} + for _prompts in sub_prompts: + if self.streaming: + if len(_prompts) > 1: + raise ValueError("Cannot stream results with multiple prompts.") + params["stream"] = True + response = _streaming_response_template() + for stream_resp in completion_with_retry( + self, prompt=_prompts, **params + ): + self.callback_manager.on_llm_new_token( + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], + ) + _update_response(response, stream_resp) + choices.extend(response["choices"]) + else: + response = completion_with_retry(self, prompt=_prompts, **params) + choices.extend(response["choices"]) + if not self.streaming: + # Can't update token usage if streaming + update_token_usage(_keys, response, token_usage) + return self.create_llm_result(choices, prompts, token_usage) + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Call out to OpenAI's endpoint async with k unique prompts.""" + params = self._invocation_params + sub_prompts = self.get_sub_prompts(params, prompts, stop) + choices = [] + token_usage: Dict[str, int] = {} + # Get the token usage from the response. + # Includes prompt, completion, and total tokens used. + _keys = {"completion_tokens", "prompt_tokens", "total_tokens"} + for _prompts in sub_prompts: + if self.streaming: + if len(_prompts) > 1: + raise ValueError("Cannot stream results with multiple prompts.") + params["stream"] = True + response = _streaming_response_template() + async for stream_resp in await acompletion_with_retry( + self, prompt=_prompts, **params + ): + if self.callback_manager.is_async: + await self.callback_manager.on_llm_new_token( + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], + ) + else: + self.callback_manager.on_llm_new_token( + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], + ) + _update_response(response, stream_resp) + choices.extend(response["choices"]) + else: + response = await acompletion_with_retry(self, prompt=_prompts, **params) + choices.extend(response["choices"]) + if not self.streaming: + # Can't update token usage if streaming + update_token_usage(_keys, response, token_usage) + return self.create_llm_result(choices, prompts, token_usage) + + def get_sub_prompts( + self, + params: Dict[str, Any], + prompts: List[str], + stop: Optional[List[str]] = None, + ) -> List[List[str]]: + """Get the sub prompts for llm call.""" + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + if params["max_tokens"] == -1: + if len(prompts) != 1: + raise ValueError( + "max_tokens set to -1 not supported for multiple inputs." + ) + params["max_tokens"] = self.max_tokens_for_prompt(prompts[0]) + sub_prompts = [ + prompts[i : i + self.batch_size] + for i in range(0, len(prompts), self.batch_size) + ] + return sub_prompts + + def create_llm_result( + self, choices: Any, prompts: List[str], token_usage: Dict[str, int] + ) -> LLMResult: + """Create the LLMResult from the choices and prompts.""" + generations = [] + for i, _ in enumerate(prompts): + sub_choices = choices[i * self.n : (i + 1) * self.n] + generations.append( + [ + Generation( + text=choice["text"], + generation_info=dict( + finish_reason=choice.get("finish_reason"), + logprobs=choice.get("logprobs"), + ), + ) + for choice in sub_choices + ] + ) + llm_output = {"token_usage": token_usage, "model_name": self.model_name} + return LLMResult(generations=generations, llm_output=llm_output) + + def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator: + """Call OpenAI with streaming flag and return the resulting generator. + + BETA: this is a beta feature while we figure out the right abstraction. + Once that happens, this interface could change. + + Args: + prompt: The prompts to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + A generator representing the stream of tokens from OpenAI. + + Example: + .. code-block:: python + + generator = openai.stream("Tell me a joke.") + for token in generator: + yield token + """ + params = self.prep_streaming_params(stop) + generator = self.client.create(prompt=prompt, **params) + + return generator + + def prep_streaming_params(self, stop: Optional[List[str]] = None) -> Dict[str, Any]: + """Prepare the params for streaming.""" + params = self._invocation_params + if params["best_of"] != 1: + raise ValueError("OpenAI only supports best_of == 1 for streaming") + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + params["stream"] = True + return params + + @property + def _invocation_params(self) -> Dict[str, Any]: + """Get the parameters used to invoke the model.""" + return self._default_params + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "openai" + + def get_num_tokens(self, text: str) -> int: + """Calculate num tokens with tiktoken package.""" + # tiktoken NOT supported for Python 3.8 or below + if sys.version_info[1] <= 8: + return super().get_num_tokens(text) + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to calculate get_num_tokens. " + "Please it install it with `pip install tiktoken`." + ) + encoder = "gpt2" + if self.model_name in ("text-davinci-003", "text-davinci-002"): + encoder = "p50k_base" + if self.model_name.startswith("code"): + encoder = "p50k_base" + # create a GPT-3 encoder instance + enc = tiktoken.get_encoding(encoder) + + # encode the text using the GPT-3 encoder + tokenized_text = enc.encode(text) + + # calculate the number of tokens in the encoded text + return len(tokenized_text) + + def modelname_to_contextsize(self, modelname: str) -> int: + """Calculate the maximum number of tokens possible to generate for a model. + + text-davinci-003: 4,097 tokens + text-curie-001: 2,048 tokens + text-babbage-001: 2,048 tokens + text-ada-001: 2,048 tokens + code-davinci-002: 8,000 tokens + code-cushman-001: 2,048 tokens + + Args: + modelname: The modelname we want to know the context size for. + + Returns: + The maximum context size + + Example: + .. code-block:: python + + max_tokens = openai.modelname_to_contextsize("text-davinci-003") + """ + if modelname == "text-davinci-003": + return 4097 + elif modelname == "text-curie-001": + return 2048 + elif modelname == "text-babbage-001": + return 2048 + elif modelname == "text-ada-001": + return 2048 + elif modelname == "code-davinci-002": + return 8000 + elif modelname == "code-cushman-001": + return 2048 + else: + return 4097 + + def max_tokens_for_prompt(self, prompt: str) -> int: + """Calculate the maximum number of tokens possible to generate for a prompt. + + Args: + prompt: The prompt to pass into the model. + + Returns: + The maximum number of tokens to generate for a prompt. + + Example: + .. code-block:: python + + max_tokens = openai.max_token_for_prompt("Tell me a joke.") + """ + num_tokens = self.get_num_tokens(prompt) + + # get max context size for model by name + max_size = self.modelname_to_contextsize(self.model_name) + return max_size - num_tokens + + +class OpenAI(BaseOpenAI): + """Generic OpenAI class that uses model name.""" + + @property + def _invocation_params(self) -> Dict[str, Any]: + return {**{"model": self.model_name}, **super()._invocation_params} + + +class AzureOpenAI(BaseOpenAI): + """Azure specific OpenAI class that uses deployment name.""" + + deployment_name: str = "" + """Deployment name to use.""" + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return { + **{"deployment_name": self.deployment_name}, + **super()._identifying_params, + } + + @property + def _invocation_params(self) -> Dict[str, Any]: + return {**{"engine": self.deployment_name}, **super()._invocation_params} + + +class OpenAIChat(BaseLLM, BaseModel): + """Wrapper around OpenAI Chat large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.llms import OpenAIChat + openaichat = OpenAIChat(model_name="gpt-3.5-turbo") + """ + + client: Any #: :meta private: + model_name: str = "gpt-3.5-turbo" + """Model name to use.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + openai_api_key: Optional[str] = None + max_retries: int = 6 + """Maximum number of retries to make when generating.""" + prefix_messages: List = Field(default_factory=list) + """Series of messages for Chat input.""" + streaming: bool = False + """Whether to stream the results or not.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.ignore + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + try: + values["client"] = openai.ChatCompletion + except AttributeError: + raise ValueError( + "`openai` has no `ChatCompletion` attribute, this is likely " + "due to an old version of the openai package. Try upgrading it " + "with `pip install --upgrade openai`." + ) + warnings.warn( + "You are trying to use a chat model. This way of initializing it is " + "no longer supported. Instead, please use: " + "`from langchain.chat_models import ChatOpenAI`" + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + return self.model_kwargs + + def _get_chat_params( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> Tuple: + if len(prompts) > 1: + raise ValueError( + f"OpenAIChat currently only supports single prompt, got {prompts}" + ) + messages = self.prefix_messages + [{"role": "user", "content": prompts[0]}] + params: Dict[str, Any] = {**{"model": self.model_name}, **self._default_params} + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + if params.get("max_tokens") == -1: + # for ChatGPT api, omitting max_tokens is equivalent to having no limit + del params["max_tokens"] + return messages, params + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + messages, params = self._get_chat_params(prompts, stop) + if self.streaming: + response = "" + params["stream"] = True + for stream_resp in completion_with_retry(self, messages=messages, **params): + token = stream_resp["choices"][0]["delta"].get("content", "") + response += token + self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + return LLMResult( + generations=[[Generation(text=response)]], + ) + else: + full_response = completion_with_retry(self, messages=messages, **params) + llm_output = { + "token_usage": full_response["usage"], + "model_name": self.model_name, + } + return LLMResult( + generations=[ + [Generation(text=full_response["choices"][0]["message"]["content"])] + ], + llm_output=llm_output, + ) + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + messages, params = self._get_chat_params(prompts, stop) + if self.streaming: + response = "" + params["stream"] = True + async for stream_resp in await acompletion_with_retry( + self, messages=messages, **params + ): + token = stream_resp["choices"][0]["delta"].get("content", "") + response += token + if self.callback_manager.is_async: + await self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + else: + self.callback_manager.on_llm_new_token( + token, + verbose=self.verbose, + ) + return LLMResult( + generations=[[Generation(text=response)]], + ) + else: + full_response = await acompletion_with_retry( + self, messages=messages, **params + ) + llm_output = { + "token_usage": full_response["usage"], + "model_name": self.model_name, + } + return LLMResult( + generations=[ + [Generation(text=full_response["choices"][0]["message"]["content"])] + ], + llm_output=llm_output, + ) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "openai-chat" + + def get_num_tokens(self, text: str) -> int: + """Calculate num tokens with tiktoken package.""" + # tiktoken NOT supported for Python 3.8 or below + if sys.version_info[1] <= 8: + return super().get_num_tokens(text) + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to calculate get_num_tokens. " + "Please it install it with `pip install tiktoken`." + ) + # create a GPT-3.5-Turbo encoder instance + enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + + # encode the text using the GPT-3.5-Turbo encoder + tokenized_text = enc.encode(text) + + # calculate the number of tokens in the encoded text + return len(tokenized_text) diff --git a/langchain/llms/petals.py b/langchain/llms/petals.py new file mode 100644 index 0000000000000000000000000000000000000000..bffe59ba817a3e9c1d1956a26a7a321ba62e8440 --- /dev/null +++ b/langchain/llms/petals.py @@ -0,0 +1,143 @@ +"""Wrapper around Petals API.""" +import logging +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class Petals(LLM, BaseModel): + """Wrapper around Petals Bloom models. + + To use, you should have the ``petals`` python package installed, and the + environment variable ``HUGGINGFACE_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + from langchain.llms import petals + petals = Petals() + + """ + + client: Any + """The client to use for the API calls.""" + + tokenizer: Any + """The tokenizer to use for the API calls.""" + + model_name: str = "bigscience/bloom-petals" + """The model to use.""" + + temperature: float = 0.7 + """What sampling temperature to use""" + + max_new_tokens: int = 256 + """The maximum number of new tokens to generate in the completion.""" + + top_p: float = 0.9 + """The cumulative probability for top-p sampling.""" + + top_k: Optional[int] = None + """The number of highest probability vocabulary tokens + to keep for top-k-filtering.""" + + do_sample: bool = True + """Whether or not to use sampling; use greedy decoding otherwise.""" + + max_length: Optional[int] = None + """The maximum length of the sequence to be generated.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call + not explicitly specified.""" + + huggingface_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic config.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + huggingface_api_key = get_from_dict_or_env( + values, "huggingface_api_key", "HUGGINGFACE_API_KEY" + ) + try: + from petals import DistributedBloomForCausalLM + from transformers import BloomTokenizerFast + + model_name = values["model_name"] + values["tokenizer"] = BloomTokenizerFast.from_pretrained(model_name) + values["client"] = DistributedBloomForCausalLM.from_pretrained(model_name) + values["huggingface_api_key"] = huggingface_api_key + + except ImportError: + raise ValueError( + "Could not import transformers or petals python package." + "Please install with `pip install -U transformers petals`." + ) + return values + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling Petals API.""" + normal_params = { + "temperature": self.temperature, + "max_new_tokens": self.max_new_tokens, + "top_p": self.top_p, + "top_k": self.top_k, + "do_sample": self.do_sample, + "max_length": self.max_length, + } + return {**normal_params, **self.model_kwargs} + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_name": self.model_name}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "petals" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call the Petals API.""" + params = self._default_params + inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"] + outputs = self.client.generate(inputs, **params) + text = self.tokenizer.decode(outputs[0]) + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/promptlayer_openai.py b/langchain/llms/promptlayer_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..1cfb3177005b70adf493157355f3e41e7ad7abc7 --- /dev/null +++ b/langchain/llms/promptlayer_openai.py @@ -0,0 +1,204 @@ +"""PromptLayer wrapper.""" +import datetime +from typing import List, Optional + +from pydantic import BaseModel + +from langchain.llms import OpenAI, OpenAIChat +from langchain.schema import LLMResult + + +class PromptLayerOpenAI(OpenAI, BaseModel): + """Wrapper around OpenAI large language models. + + To use, you should have the ``openai`` and ``promptlayer`` python + package installed, and the environment variable ``OPENAI_API_KEY`` + and ``PROMPTLAYER_API_KEY`` set with your openAI API key and + promptlayer key respectively. + + All parameters that can be passed to the OpenAI LLM can also + be passed here. The PromptLayerOpenAI LLM adds two optional + parameters: + ``pl_tags``: List of strings to tag the request with. + ``return_pl_id``: If True, the PromptLayer request ID will be + returned in the ``generation_info`` field of the + ``Generation`` object. + + Example: + .. code-block:: python + + from langchain.llms import PromptLayerOpenAI + openai = PromptLayerOpenAI(model_name="text-davinci-003") + """ + + pl_tags: Optional[List[str]] + return_pl_id: Optional[bool] = False + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Call OpenAI generate and then call PromptLayer API to log the request.""" + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = super()._generate(prompts, stop) + request_end_time = datetime.datetime.now().timestamp() + for i in range(len(prompts)): + prompt = prompts[i] + generation = generated_responses.generations[i][0] + resp = { + "text": generation.text, + "llm_output": generated_responses.llm_output, + } + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerOpenAI", + "langchain", + [prompt], + self._identifying_params, + self.pl_tags, + resp, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = await super()._agenerate(prompts, stop) + request_end_time = datetime.datetime.now().timestamp() + for i in range(len(prompts)): + prompt = prompts[i] + generation = generated_responses.generations[i][0] + resp = { + "text": generation.text, + "llm_output": generated_responses.llm_output, + } + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerOpenAI.async", + "langchain", + [prompt], + self._identifying_params, + self.pl_tags, + resp, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses + + +class PromptLayerOpenAIChat(OpenAIChat, BaseModel): + """Wrapper around OpenAI large language models. + + To use, you should have the ``openai`` and ``promptlayer`` python + package installed, and the environment variable ``OPENAI_API_KEY`` + and ``PROMPTLAYER_API_KEY`` set with your openAI API key and + promptlayer key respectively. + + All parameters that can be passed to the OpenAIChat LLM can also + be passed here. The PromptLayerOpenAIChat adds two optional + parameters: + ``pl_tags``: List of strings to tag the request with. + ``return_pl_id``: If True, the PromptLayer request ID will be + returned in the ``generation_info`` field of the + ``Generation`` object. + + Example: + .. code-block:: python + + from langchain.llms import PromptLayerOpenAIChat + openaichat = PromptLayerOpenAIChat(model_name="gpt-3.5-turbo") + """ + + pl_tags: Optional[List[str]] + return_pl_id: Optional[bool] = False + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + """Call OpenAI generate and then call PromptLayer API to log the request.""" + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = super()._generate(prompts, stop) + request_end_time = datetime.datetime.now().timestamp() + for i in range(len(prompts)): + prompt = prompts[i] + generation = generated_responses.generations[i][0] + resp = { + "text": generation.text, + "llm_output": generated_responses.llm_output, + } + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerOpenAIChat", + "langchain", + [prompt], + self._identifying_params, + self.pl_tags, + resp, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + from promptlayer.utils import get_api_key, promptlayer_api_request + + request_start_time = datetime.datetime.now().timestamp() + generated_responses = await super()._agenerate(prompts, stop) + request_end_time = datetime.datetime.now().timestamp() + for i in range(len(prompts)): + prompt = prompts[i] + generation = generated_responses.generations[i][0] + resp = { + "text": generation.text, + "llm_output": generated_responses.llm_output, + } + pl_request_id = promptlayer_api_request( + "langchain.PromptLayerOpenAIChat.async", + "langchain", + [prompt], + self._identifying_params, + self.pl_tags, + resp, + request_start_time, + request_end_time, + get_api_key(), + return_pl_id=self.return_pl_id, + ) + if self.return_pl_id: + if generation.generation_info is None or not isinstance( + generation.generation_info, dict + ): + generation.generation_info = {} + generation.generation_info["pl_request_id"] = pl_request_id + return generated_responses diff --git a/langchain/llms/sagemaker_endpoint.py b/langchain/llms/sagemaker_endpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..926e17184b48469276d83bbc936d8c840654d080 --- /dev/null +++ b/langchain/llms/sagemaker_endpoint.py @@ -0,0 +1,237 @@ +"""Wrapper around Sagemaker InvokeEndpoint API.""" +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Mapping, Optional, Union + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + + +class ContentHandlerBase(ABC): + """A handler class to transform input from LLM to a + format that SageMaker endpoint expects. Similarily, + the class also handles transforming output from the + SageMaker endpoint to a format that LLM class expects. + """ + + """ + Example: + .. code-block:: python + + class ContentHandler(ContentHandlerBase): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: + input_str = json.dumps({prompt: prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json[0]["generated_text"] + """ + + content_type: Optional[str] = "text/plain" + """The MIME type of the input data passed to endpoint""" + + accepts: Optional[str] = "text/plain" + """The MIME type of the response data returned from endpoint""" + + @abstractmethod + def transform_input( + self, prompt: Union[str, List[str]], model_kwargs: Dict + ) -> bytes: + """Transforms the input to a format that model can accept + as the request Body. Should return bytes or seekable file + like object in the format specified in the content_type + request header. + """ + + @abstractmethod + def transform_output(self, output: bytes) -> Any: + """Transforms the output from the model to string that + the LLM class expects. + """ + + +class SagemakerEndpoint(LLM, BaseModel): + """Wrapper around custom Sagemaker Inference Endpoints. + + To use, you must supply the endpoint name from your deployed + Sagemaker model & the region where it is deployed. + + To authenticate, the AWS client uses the following methods to + automatically load credentials: + https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + + If a specific credential profile should be used, you must pass + the name of the profile from the ~/.aws/credentials file that is to be used. + + Make sure the credentials / roles used have the required policies to + access the Sagemaker endpoint. + See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html + """ + + """ + Example: + .. code-block:: python + + from langchain import SagemakerEndpoint + endpoint_name = ( + "my-endpoint-name" + ) + region_name = ( + "us-west-2" + ) + credentials_profile_name = ( + "default" + ) + se = SagemakerEndpoint( + endpoint_name=endpoint_name, + region_name=region_name, + credentials_profile_name=credentials_profile_name + ) + """ + client: Any #: :meta private: + + endpoint_name: str = "" + """The name of the endpoint from the deployed Sagemaker model. + Must be unique within an AWS Region.""" + + region_name: str = "" + """The aws region where the Sagemaker model is deployed, eg. `us-west-2`.""" + + credentials_profile_name: Optional[str] = None + """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which + has either access keys or role information specified. + If not specified, the default credential profile or, if on an EC2 instance, + credentials from IMDS will be used. + See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + """ + + content_handler: ContentHandlerBase + """The content handler class that provides an input and + output transform functions to handle formats between LLM + and the endpoint. + """ + + """ + Example: + .. code-block:: python + + class ContentHandler(ContentHandlerBase): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: + input_str = json.dumps({prompt: prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json[0]["generated_text"] + """ + + model_kwargs: Optional[Dict] = None + """Key word arguments to pass to the model.""" + + endpoint_kwargs: Optional[Dict] = None + """Optional attributes passed to the invoke_endpoint + function. See `boto3`_. docs for more info. + .. _boto3: + """ + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that AWS credentials to and python package exists in environment.""" + try: + import boto3 + + try: + if values["credentials_profile_name"] is not None: + session = boto3.Session( + profile_name=values["credentials_profile_name"] + ) + else: + # use default credentials + session = boto3.Session() + + values["client"] = session.client( + "sagemaker-runtime", region_name=values["region_name"] + ) + + except Exception as e: + raise ValueError( + "Could not load credentials to authenticate with AWS client. " + "Please check that credentials in the specified " + "profile name are valid." + ) from e + + except ImportError: + raise ValueError( + "Could not import boto3 python package. " + "Please it install it with `pip install boto3`." + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + _model_kwargs = self.model_kwargs or {} + return { + **{"endpoint_name": self.endpoint_name}, + **{"model_kwargs": _model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "sagemaker_endpoint" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Sagemaker inference endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = se("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + _endpoint_kwargs = self.endpoint_kwargs or {} + + body = self.content_handler.transform_input(prompt, _model_kwargs) + content_type = self.content_handler.content_type + accepts = self.content_handler.accepts + + # send request + try: + response = self.client.invoke_endpoint( + EndpointName=self.endpoint_name, + Body=body, + ContentType=content_type, + Accept=accepts, + **_endpoint_kwargs, + ) + except Exception as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + text = self.content_handler.transform_output(response["Body"]) + if stop is not None: + # This is a bit hacky, but I can't figure out a better way to enforce + # stop tokens when making calls to the sagemaker endpoint. + text = enforce_stop_tokens(text, stop) + + return text diff --git a/langchain/llms/self_hosted.py b/langchain/llms/self_hosted.py new file mode 100644 index 0000000000000000000000000000000000000000..3054329f017bdb3e76e8b131f3932704748c1280 --- /dev/null +++ b/langchain/llms/self_hosted.py @@ -0,0 +1,212 @@ +"""Run model inference on self-hosted remote hardware.""" +import importlib.util +import logging +import pickle +from typing import Any, Callable, List, Mapping, Optional + +from pydantic import BaseModel, Extra + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + +logger = logging.getLogger() + + +def _generate_text( + pipeline: Any, + prompt: str, + *args: Any, + stop: Optional[List[str]] = None, + **kwargs: Any, +) -> str: + """Inference function to send to the remote hardware. + + Accepts a pipeline callable (or, more likely, + a key pointing to the model on the cluster's object store) + and returns text predictions for each document + in the batch. + """ + text = pipeline(prompt, *args, **kwargs) + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text + + +def _send_pipeline_to_device(pipeline: Any, device: int) -> Any: + """Send a pipeline to a device on the cluster.""" + if isinstance(pipeline, str): + with open(pipeline, "rb") as f: + pipeline = pickle.load(f) + + if importlib.util.find_spec("torch") is not None: + import torch + + cuda_device_count = torch.cuda.device_count() + if device < -1 or (device >= cuda_device_count): + raise ValueError( + f"Got device=={device}, " + f"device is required to be within [-1, {cuda_device_count})" + ) + if device < 0 and cuda_device_count > 0: + logger.warning( + "Device has %d GPUs available. " + "Provide device={deviceId} to `from_model_id` to use available" + "GPUs for execution. deviceId is -1 for CPU and " + "can be a positive integer associated with CUDA device id.", + cuda_device_count, + ) + + pipeline.device = torch.device(device) + pipeline.model = pipeline.model.to(pipeline.device) + return pipeline + + +class SelfHostedPipeline(LLM, BaseModel): + """Run model inference on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another + cloud like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example for custom pipeline and inference functions: + .. code-block:: python + + from langchain.llms import SelfHostedPipeline + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + import runhouse as rh + + def load_pipeline(): + tokenizer = AutoTokenizer.from_pretrained("gpt2") + model = AutoModelForCausalLM.from_pretrained("gpt2") + return pipeline( + "text-generation", model=model, tokenizer=tokenizer, + max_new_tokens=10 + ) + def inference_fn(pipeline, prompt, stop = None): + return pipeline(prompt)[0]["generated_text"] + + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + llm = SelfHostedPipeline( + model_load_fn=load_pipeline, + hardware=gpu, + model_reqs=model_reqs, inference_fn=inference_fn + ) + Example for <2GB model (can be serialized and sent directly to the server): + .. code-block:: python + + from langchain.llms import SelfHostedPipeline + import runhouse as rh + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + my_model = ... + llm = SelfHostedPipeline.from_pipeline( + pipeline=my_model, + hardware=gpu, + model_reqs=["./", "torch", "transformers"], + ) + Example passing model path for larger models: + .. code-block:: python + + from langchain.llms import SelfHostedPipeline + import runhouse as rh + import pickle + from transformers import pipeline + + generator = pipeline(model="gpt2") + rh.blob(pickle.dumps(generator), path="models/pipeline.pkl" + ).save().to(gpu, path="models") + llm = SelfHostedPipeline.from_pipeline( + pipeline="models/pipeline.pkl", + hardware=gpu, + model_reqs=["./", "torch", "transformers"], + ) + """ + + pipeline_ref: Any #: :meta private: + client: Any #: :meta private: + inference_fn: Callable = _generate_text #: :meta private: + """Inference function to send to the remote hardware.""" + hardware: Any + """Remote hardware to send the inference function to.""" + model_load_fn: Callable + """Function to load the model remotely on the server.""" + load_fn_kwargs: Optional[dict] = None + """Key word arguments to pass to the model load function.""" + model_reqs: List[str] = ["./", "torch"] + """Requirements to install on hardware to inference the model.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def __init__(self, **kwargs: Any): + """Init the pipeline with an auxiliary function. + + The load function must be in global scope to be imported + and run on the server, i.e. in a module and not a REPL or closure. + Then, initialize the remote inference function. + """ + super().__init__(**kwargs) + try: + import runhouse as rh + + except ImportError: + raise ValueError( + "Could not import runhouse python package. " + "Please install it with `pip install runhouse`." + ) + + remote_load_fn = rh.function(fn=self.model_load_fn).to( + self.hardware, reqs=self.model_reqs + ) + _load_fn_kwargs = self.load_fn_kwargs or {} + self.pipeline_ref = remote_load_fn.remote(**_load_fn_kwargs) + + self.client = rh.function(fn=self.inference_fn).to( + self.hardware, reqs=self.model_reqs + ) + + @classmethod + def from_pipeline( + cls, + pipeline: Any, + hardware: Any, + model_reqs: Optional[List[str]] = None, + device: int = 0, + **kwargs: Any, + ) -> LLM: + """Init the SelfHostedPipeline from a pipeline object or string.""" + if not isinstance(pipeline, str): + logger.warning( + "Serializing pipeline to send to remote hardware. " + "Note, it can be quite slow" + "to serialize and send large models with each execution. " + "Consider sending the pipeline" + "to the cluster and passing the path to the pipeline instead." + ) + + load_fn_kwargs = {"pipeline": pipeline, "device": device} + return cls( + load_fn_kwargs=load_fn_kwargs, + model_load_fn=_send_pipeline_to_device, + hardware=hardware, + model_reqs=["transformers", "torch"] + (model_reqs or []), + **kwargs, + ) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"hardware": self.hardware}, + } + + @property + def _llm_type(self) -> str: + return "self_hosted_llm" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + return self.client(pipeline=self.pipeline_ref, prompt=prompt, stop=stop) diff --git a/langchain/llms/self_hosted_hugging_face.py b/langchain/llms/self_hosted_hugging_face.py new file mode 100644 index 0000000000000000000000000000000000000000..9415b6ca5cb044f6912ad72516d0104ed3aa244c --- /dev/null +++ b/langchain/llms/self_hosted_hugging_face.py @@ -0,0 +1,202 @@ +"""Wrapper around HuggingFace Pipeline API to run on self-hosted remote hardware.""" +import importlib.util +import logging +from typing import Any, Callable, List, Mapping, Optional + +from pydantic import BaseModel, Extra + +from langchain.llms.self_hosted import SelfHostedPipeline +from langchain.llms.utils import enforce_stop_tokens + +DEFAULT_MODEL_ID = "gpt2" +DEFAULT_TASK = "text-generation" +VALID_TASKS = ("text2text-generation", "text-generation") + +logger = logging.getLogger() + + +def _generate_text( + pipeline: Any, + prompt: str, + *args: Any, + stop: Optional[List[str]] = None, + **kwargs: Any, +) -> str: + """Inference function to send to the remote hardware. + + Accepts a Hugging Face pipeline (or more likely, + a key pointing to such a pipeline on the cluster's object store) + and returns generated text. + """ + response = pipeline(prompt, *args, **kwargs) + if pipeline.task == "text-generation": + # Text generation return includes the starter text. + text = response[0]["generated_text"][len(prompt) :] + elif pipeline.task == "text2text-generation": + text = response[0]["generated_text"] + else: + raise ValueError( + f"Got invalid task {pipeline.task}, " + f"currently only {VALID_TASKS} are supported" + ) + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text + + +def _load_transformer( + model_id: str = DEFAULT_MODEL_ID, + task: str = DEFAULT_TASK, + device: int = 0, + model_kwargs: Optional[dict] = None, +) -> Any: + """Inference function to send to the remote hardware. + + Accepts a huggingface model_id and returns a pipeline for the task. + """ + from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer + from transformers import pipeline as hf_pipeline + + _model_kwargs = model_kwargs or {} + tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs) + + try: + if task == "text-generation": + model = AutoModelForCausalLM.from_pretrained(model_id, **_model_kwargs) + elif task == "text2text-generation": + model = AutoModelForSeq2SeqLM.from_pretrained(model_id, **_model_kwargs) + else: + raise ValueError( + f"Got invalid task {task}, " + f"currently only {VALID_TASKS} are supported" + ) + except ImportError as e: + raise ValueError( + f"Could not load the {task} model due to missing dependencies." + ) from e + + if importlib.util.find_spec("torch") is not None: + import torch + + cuda_device_count = torch.cuda.device_count() + if device < -1 or (device >= cuda_device_count): + raise ValueError( + f"Got device=={device}, " + f"device is required to be within [-1, {cuda_device_count})" + ) + if device < 0 and cuda_device_count > 0: + logger.warning( + "Device has %d GPUs available. " + "Provide device={deviceId} to `from_model_id` to use available" + "GPUs for execution. deviceId is -1 for CPU and " + "can be a positive integer associated with CUDA device id.", + cuda_device_count, + ) + + pipeline = hf_pipeline( + task=task, + model=model, + tokenizer=tokenizer, + device=device, + model_kwargs=_model_kwargs, + ) + if pipeline.task not in VALID_TASKS: + raise ValueError( + f"Got invalid task {pipeline.task}, " + f"currently only {VALID_TASKS} are supported" + ) + return pipeline + + +class SelfHostedHuggingFaceLLM(SelfHostedPipeline, BaseModel): + """Wrapper around HuggingFace Pipeline API to run on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another cloud + like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Only supports `text-generation` and `text2text-generation` for now. + + Example using from_model_id: + .. code-block:: python + + from langchain.llms import SelfHostedHuggingFaceLLM + import runhouse as rh + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + hf = SelfHostedHuggingFaceLLM( + model_id="google/flan-t5-large", task="text2text-generation", + hardware=gpu + ) + Example passing fn that generates a pipeline (bc the pipeline is not serializable): + .. code-block:: python + + from langchain.llms import SelfHostedHuggingFaceLLM + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + import runhouse as rh + + def get_pipeline(): + model_id = "gpt2" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + pipe = pipeline( + "text-generation", model=model, tokenizer=tokenizer + ) + return pipe + hf = SelfHostedHuggingFaceLLM( + model_load_fn=get_pipeline, model_id="gpt2", hardware=gpu) + """ + + model_id: str = DEFAULT_MODEL_ID + """Hugging Face model_id to load the model.""" + task: str = DEFAULT_TASK + """Hugging Face task (either "text-generation" or "text2text-generation").""" + device: int = 0 + """Device to use for inference. -1 for CPU, 0 for GPU, 1 for second GPU, etc.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + hardware: Any + """Remote hardware to send the inference function to.""" + model_reqs: List[str] = ["./", "transformers", "torch"] + """Requirements to install on hardware to inference the model.""" + model_load_fn: Callable = _load_transformer + """Function to load the model remotely on the server.""" + inference_fn: Callable = _generate_text #: :meta private: + """Inference function to send to the remote hardware.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def __init__(self, **kwargs: Any): + """Construct the pipeline remotely using an auxiliary function. + + The load function needs to be importable to be imported + and run on the server, i.e. in a module and not a REPL or closure. + Then, initialize the remote inference function. + """ + load_fn_kwargs = { + "model_id": kwargs.get("model_id", DEFAULT_MODEL_ID), + "task": kwargs.get("task", DEFAULT_TASK), + "device": kwargs.get("device", 0), + "model_kwargs": kwargs.get("model_kwargs", None), + } + super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"model_id": self.model_id}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + return "selfhosted_huggingface_pipeline" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + return self.client(pipeline=self.pipeline_ref, prompt=prompt, stop=stop) diff --git a/langchain/llms/stochasticai.py b/langchain/llms/stochasticai.py new file mode 100644 index 0000000000000000000000000000000000000000..21c32b216740202701a03ae439edba193f76f974 --- /dev/null +++ b/langchain/llms/stochasticai.py @@ -0,0 +1,130 @@ +"""Wrapper around StochasticAI APIs.""" +import logging +import time +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class StochasticAI(LLM, BaseModel): + """Wrapper around StochasticAI large language models. + + To use, you should have the environment variable ``STOCHASTICAI_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain.llms import StochasticAI + stochasticai = StochasticAI(api_url="") + """ + + api_url: str = "" + """Model name to use.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not + explicitly specified.""" + + stochasticai_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = {field.alias for field in cls.__fields__.values()} + + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name not in all_required_field_names: + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + logger.warning( + f"""{field_name} was transfered to model_kwargs. + Please confirm that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + values["model_kwargs"] = extra + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + stochasticai_api_key = get_from_dict_or_env( + values, "stochasticai_api_key", "STOCHASTICAI_API_KEY" + ) + values["stochasticai_api_key"] = stochasticai_api_key + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + **{"endpoint_url": self.api_url}, + **{"model_kwargs": self.model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "stochasticai" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to StochasticAI's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = StochasticAI("Tell me a joke.") + """ + params = self.model_kwargs or {} + response_post = requests.post( + url=self.api_url, + json={"prompt": prompt, "params": params}, + headers={ + "apiKey": f"{self.stochasticai_api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + ) + response_post.raise_for_status() + response_post_json = response_post.json() + completed = False + while not completed: + response_get = requests.get( + url=response_post_json["data"]["responseUrl"], + headers={ + "apiKey": f"{self.stochasticai_api_key}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + ) + response_get.raise_for_status() + response_get_json = response_get.json()["data"] + text = response_get_json.get("completion") + completed = text is not None + time.sleep(0.5) + text = text[0] + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/llms/utils.py b/langchain/llms/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a42fd130ee6897602d1c4ba105a0c10ed010daec --- /dev/null +++ b/langchain/llms/utils.py @@ -0,0 +1,8 @@ +"""Common utility functions for working with LLM APIs.""" +import re +from typing import List + + +def enforce_stop_tokens(text: str, stop: List[str]) -> str: + """Cut off the text as soon as any stop words occur.""" + return re.split("|".join(stop), text)[0] diff --git a/langchain/llms/writer.py b/langchain/llms/writer.py new file mode 100644 index 0000000000000000000000000000000000000000..7959bac6f33f50be92c630bb6f44d3bcf73cd55a --- /dev/null +++ b/langchain/llms/writer.py @@ -0,0 +1,155 @@ +"""Wrapper around Writer APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + + +class Writer(LLM, BaseModel): + """Wrapper around Writer large language models. + + To use, you should have the environment variable ``WRITER_API_KEY`` + set with your API key. + + Example: + .. code-block:: python + + from langchain import Writer + writer = Writer(model_id="palmyra-base") + """ + + model_id: str = "palmyra-base" + """Model name to use.""" + + tokens_to_generate: int = 24 + """Max number of tokens to generate.""" + + logprobs: bool = False + """Whether to return log probabilities.""" + + temperature: float = 1.0 + """What sampling temperature to use.""" + + length: int = 256 + """The maximum number of tokens to generate in the completion.""" + + top_p: float = 1.0 + """Total probability mass of tokens to consider at each step.""" + + top_k: int = 1 + """The number of highest probability vocabulary tokens to + keep for top-k-filtering.""" + + repetition_penalty: float = 1.0 + """Penalizes repeated tokens according to frequency.""" + + random_seed: int = 0 + """The model generates random results. + Changing the random seed alone will produce a different response + with similar characteristics. It is possible to reproduce results + by fixing the random seed (assuming all other hyperparameters + are also fixed)""" + + beam_search_diversity_rate: float = 1.0 + """Only applies to beam search, i.e. when the beam width is >1. + A higher value encourages beam search to return a more diverse + set of candidates""" + + beam_width: Optional[int] = None + """The number of concurrent candidates to keep track of during + beam search""" + + length_pentaly: float = 1.0 + """Only applies to beam search, i.e. when the beam width is >1. + Larger values penalize long candidates more heavily, thus preferring + shorter candidates""" + + writer_api_key: Optional[str] = None + + stop: Optional[List[str]] = None + """Sequences when completion generation will stop""" + + base_url: Optional[str] = None + """Base url to use, if None decides based on model name.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + writer_api_key = get_from_dict_or_env( + values, "writer_api_key", "WRITER_API_KEY" + ) + values["writer_api_key"] = writer_api_key + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Writer API.""" + return { + "tokens_to_generate": self.tokens_to_generate, + "stop": self.stop, + "logprobs": self.logprobs, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "repetition_penalty": self.repetition_penalty, + "random_seed": self.random_seed, + "beam_search_diversity_rate": self.beam_search_diversity_rate, + "beam_width": self.beam_width, + "length_pentaly": self.length_pentaly, + } + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{"model_id": self.model_id}, **self._default_params} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "writer" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to Writer's complete endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = Writer("Tell me a joke.") + """ + if self.base_url is not None: + base_url = self.base_url + else: + base_url = ( + "https://api.llm.writer.com/v1/models/{self.model_id}/completions" + ) + response = requests.post( + url=base_url, + headers={ + "Authorization": f"Bearer {self.writer_api_key}", + "Content-Type": "application/json", + "Accept": "application/json", + }, + json={"prompt": prompt, **self._default_params}, + ) + text = response.text + if stop is not None: + # I believe this is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + return text diff --git a/langchain/memory/__init__.py b/langchain/memory/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..65fe5d628de2704a2345c134ac495d90a3166b69 --- /dev/null +++ b/langchain/memory/__init__.py @@ -0,0 +1,29 @@ +from langchain.memory.buffer import ( + ConversationBufferMemory, + ConversationStringBufferMemory, +) +from langchain.memory.buffer_window import ConversationBufferWindowMemory +from langchain.memory.chat_memory import ChatMessageHistory +from langchain.memory.combined import CombinedMemory +from langchain.memory.entity import ConversationEntityMemory +from langchain.memory.kg import ConversationKGMemory +from langchain.memory.readonly import ReadOnlySharedMemory +from langchain.memory.simple import SimpleMemory +from langchain.memory.summary import ConversationSummaryMemory +from langchain.memory.summary_buffer import ConversationSummaryBufferMemory +from langchain.memory.token_buffer import ConversationTokenBufferMemory + +__all__ = [ + "CombinedMemory", + "ConversationBufferWindowMemory", + "ConversationBufferMemory", + "SimpleMemory", + "ConversationSummaryBufferMemory", + "ConversationKGMemory", + "ConversationEntityMemory", + "ConversationSummaryMemory", + "ChatMessageHistory", + "ConversationStringBufferMemory", + "ReadOnlySharedMemory", + "ConversationTokenBufferMemory", +] diff --git a/langchain/memory/__pycache__/__init__.cpython-39.pyc b/langchain/memory/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc6f366467b21b8f9f612470772aa109a226a5c5 Binary files /dev/null and b/langchain/memory/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/buffer.cpython-39.pyc b/langchain/memory/__pycache__/buffer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..861641e642254820afa6002a27397b1005e7a1e8 Binary files /dev/null and b/langchain/memory/__pycache__/buffer.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/buffer_window.cpython-39.pyc b/langchain/memory/__pycache__/buffer_window.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b7184ba2579754a9a757a0a92c980ac6dcccb70 Binary files /dev/null and b/langchain/memory/__pycache__/buffer_window.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/chat_memory.cpython-39.pyc b/langchain/memory/__pycache__/chat_memory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfcd617d65d8bba25d3396b5874eb53617aac68c Binary files /dev/null and b/langchain/memory/__pycache__/chat_memory.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/combined.cpython-39.pyc b/langchain/memory/__pycache__/combined.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62f1fb2dfa34ee90f8a06fa83edf3bb39fc6195b Binary files /dev/null and b/langchain/memory/__pycache__/combined.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/entity.cpython-39.pyc b/langchain/memory/__pycache__/entity.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fd50bbcdd22f6e139eb7b40bf8589430bb837e2 Binary files /dev/null and b/langchain/memory/__pycache__/entity.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/kg.cpython-39.pyc b/langchain/memory/__pycache__/kg.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b99d69e85ceba27d82ce32a3d17ab74b8b42b68d Binary files /dev/null and b/langchain/memory/__pycache__/kg.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/prompt.cpython-39.pyc b/langchain/memory/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..761d8da117109d36c616aeb80db5c6f2820637d0 Binary files /dev/null and b/langchain/memory/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/readonly.cpython-39.pyc b/langchain/memory/__pycache__/readonly.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb51b91172f50d1206e30a1b79085237b9fe079f Binary files /dev/null and b/langchain/memory/__pycache__/readonly.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/simple.cpython-39.pyc b/langchain/memory/__pycache__/simple.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ad191e4ea9372bb1523895820ef36b69fa6b972 Binary files /dev/null and b/langchain/memory/__pycache__/simple.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/summary.cpython-39.pyc b/langchain/memory/__pycache__/summary.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cc26220fac726e0c52b0c70ba279a4150761c4a Binary files /dev/null and b/langchain/memory/__pycache__/summary.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/summary_buffer.cpython-39.pyc b/langchain/memory/__pycache__/summary_buffer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f88049a3d27770a87337eec25a3373e46ce6763 Binary files /dev/null and b/langchain/memory/__pycache__/summary_buffer.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/token_buffer.cpython-39.pyc b/langchain/memory/__pycache__/token_buffer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c33b1dda42cb74f332bf0c670196977870dbcb1f Binary files /dev/null and b/langchain/memory/__pycache__/token_buffer.cpython-39.pyc differ diff --git a/langchain/memory/__pycache__/utils.cpython-39.pyc b/langchain/memory/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc354a15e778627ca132be8e0fe14b831cb9d2b6 Binary files /dev/null and b/langchain/memory/__pycache__/utils.cpython-39.pyc differ diff --git a/langchain/memory/buffer.py b/langchain/memory/buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..0e197f84e68ae26fa82d711ac6a0940a05cfbd14 --- /dev/null +++ b/langchain/memory/buffer.py @@ -0,0 +1,91 @@ +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, root_validator + +from langchain.memory.chat_memory import BaseChatMemory, BaseMemory +from langchain.memory.utils import get_prompt_input_key +from langchain.schema import get_buffer_string + + +class ConversationBufferMemory(BaseChatMemory, BaseModel): + """Buffer for storing conversation memory.""" + + human_prefix: str = "Human" + ai_prefix: str = "AI" + memory_key: str = "history" #: :meta private: + + @property + def buffer(self) -> Any: + """String buffer of memory.""" + if self.return_messages: + return self.chat_memory.messages + else: + return get_buffer_string( + self.chat_memory.messages, + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + return {self.memory_key: self.buffer} + + +class ConversationStringBufferMemory(BaseMemory, BaseModel): + """Buffer for storing conversation memory.""" + + human_prefix: str = "Human" + ai_prefix: str = "AI" + """Prefix to use for AI generated responses.""" + buffer: str = "" + output_key: Optional[str] = None + input_key: Optional[str] = None + memory_key: str = "history" #: :meta private: + + @root_validator() + def validate_chains(cls, values: Dict) -> Dict: + """Validate that return messages is not True.""" + if values.get("return_messages", False): + raise ValueError( + "return_messages must be False for ConversationStringBufferMemory" + ) + return values + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """Return history buffer.""" + return {self.memory_key: self.buffer} + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + if self.input_key is None: + prompt_input_key = get_prompt_input_key(inputs, self.memory_variables) + else: + prompt_input_key = self.input_key + if self.output_key is None: + if len(outputs) != 1: + raise ValueError(f"One output key expected, got {outputs.keys()}") + output_key = list(outputs.keys())[0] + else: + output_key = self.output_key + human = f"{self.human_prefix}: " + inputs[prompt_input_key] + ai = f"{self.ai_prefix}: " + outputs[output_key] + self.buffer += "\n" + "\n".join([human, ai]) + + def clear(self) -> None: + """Clear memory contents.""" + self.buffer = "" diff --git a/langchain/memory/buffer_window.py b/langchain/memory/buffer_window.py new file mode 100644 index 0000000000000000000000000000000000000000..d76faaddcbfcb5614e1b6050c656a1988abd2baf --- /dev/null +++ b/langchain/memory/buffer_window.py @@ -0,0 +1,41 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel + +from langchain.memory.chat_memory import BaseChatMemory +from langchain.schema import BaseMessage, get_buffer_string + + +class ConversationBufferWindowMemory(BaseChatMemory, BaseModel): + """Buffer for storing conversation memory.""" + + human_prefix: str = "Human" + ai_prefix: str = "AI" + memory_key: str = "history" #: :meta private: + k: int = 5 + + @property + def buffer(self) -> List[BaseMessage]: + """String buffer of memory.""" + return self.chat_memory.messages + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """Return history buffer.""" + + if self.return_messages: + buffer: Any = self.buffer[-self.k * 2 :] + else: + buffer = get_buffer_string( + self.buffer[-self.k * 2 :], + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + return {self.memory_key: buffer} diff --git a/langchain/memory/chat_memory.py b/langchain/memory/chat_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..0dc8010383b2862f4ccb152bd03f737e4f020697 --- /dev/null +++ b/langchain/memory/chat_memory.py @@ -0,0 +1,46 @@ +from abc import ABC +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + +from langchain.memory.utils import get_prompt_input_key +from langchain.schema import AIMessage, BaseMemory, BaseMessage, HumanMessage + + +class ChatMessageHistory(BaseModel): + messages: List[BaseMessage] = Field(default_factory=list) + + def add_user_message(self, message: str) -> None: + self.messages.append(HumanMessage(content=message)) + + def add_ai_message(self, message: str) -> None: + self.messages.append(AIMessage(content=message)) + + def clear(self) -> None: + self.messages = [] + + +class BaseChatMemory(BaseMemory, ABC): + chat_memory: ChatMessageHistory = Field(default_factory=ChatMessageHistory) + output_key: Optional[str] = None + input_key: Optional[str] = None + return_messages: bool = False + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + if self.input_key is None: + prompt_input_key = get_prompt_input_key(inputs, self.memory_variables) + else: + prompt_input_key = self.input_key + if self.output_key is None: + if len(outputs) != 1: + raise ValueError(f"One output key expected, got {outputs.keys()}") + output_key = list(outputs.keys())[0] + else: + output_key = self.output_key + self.chat_memory.add_user_message(inputs[prompt_input_key]) + self.chat_memory.add_ai_message(outputs[output_key]) + + def clear(self) -> None: + """Clear memory contents.""" + self.chat_memory.clear() diff --git a/langchain/memory/combined.py b/langchain/memory/combined.py new file mode 100644 index 0000000000000000000000000000000000000000..eaee9c36383ea046fd00da499de1e781ab5faf9d --- /dev/null +++ b/langchain/memory/combined.py @@ -0,0 +1,49 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel + +from langchain.schema import BaseMemory + + +class CombinedMemory(BaseMemory, BaseModel): + """Class for combining multiple memories' data together.""" + + memories: List[BaseMemory] + """For tracking all the memories that should be accessed.""" + + @property + def memory_variables(self) -> List[str]: + """All the memory variables that this instance provides.""" + """Collected from the all the linked memories.""" + + memory_variables = [] + + for memory in self.memories: + memory_variables.extend(memory.memory_variables) + + return memory_variables + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """Load all vars from sub-memories.""" + memory_data: Dict[str, Any] = {} + + # Collect vars from all sub-memories + for memory in self.memories: + data = memory.load_memory_variables(inputs) + memory_data = { + **memory_data, + **data, + } + + return memory_data + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this session for every memory.""" + # Save context for all sub-memories + for memory in self.memories: + memory.save_context(inputs, outputs) + + def clear(self) -> None: + """Clear context from this session for every memory.""" + for memory in self.memories: + memory.clear() diff --git a/langchain/memory/entity.py b/langchain/memory/entity.py new file mode 100644 index 0000000000000000000000000000000000000000..73f0bc15d562c174de6d5136d63ab45fc604f7f1 --- /dev/null +++ b/langchain/memory/entity.py @@ -0,0 +1,102 @@ +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel + +from langchain.chains.llm import LLMChain +from langchain.memory.chat_memory import BaseChatMemory +from langchain.memory.prompt import ( + ENTITY_EXTRACTION_PROMPT, + ENTITY_SUMMARIZATION_PROMPT, +) +from langchain.memory.utils import get_prompt_input_key +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel, BaseMessage, get_buffer_string + + +class ConversationEntityMemory(BaseChatMemory, BaseModel): + """Entity extractor & summarizer to memory.""" + + human_prefix: str = "Human" + ai_prefix: str = "AI" + llm: BaseLanguageModel + entity_extraction_prompt: BasePromptTemplate = ENTITY_EXTRACTION_PROMPT + entity_summarization_prompt: BasePromptTemplate = ENTITY_SUMMARIZATION_PROMPT + store: Dict[str, Optional[str]] = {} + entity_cache: List[str] = [] + k: int = 3 + chat_history_key: str = "history" + + @property + def buffer(self) -> List[BaseMessage]: + return self.chat_memory.messages + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return ["entities", self.chat_history_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + chain = LLMChain(llm=self.llm, prompt=self.entity_extraction_prompt) + if self.input_key is None: + prompt_input_key = get_prompt_input_key(inputs, self.memory_variables) + else: + prompt_input_key = self.input_key + buffer_string = get_buffer_string( + self.buffer[-self.k * 2 :], + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + output = chain.predict( + history=buffer_string, + input=inputs[prompt_input_key], + ) + if output.strip() == "NONE": + entities = [] + else: + entities = [w.strip() for w in output.split(",")] + entity_summaries = {} + for entity in entities: + entity_summaries[entity] = self.store.get(entity, "") + self.entity_cache = entities + if self.return_messages: + buffer: Any = self.buffer[-self.k * 2 :] + else: + buffer = buffer_string + return { + self.chat_history_key: buffer, + "entities": entity_summaries, + } + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + super().save_context(inputs, outputs) + if self.input_key is None: + prompt_input_key = get_prompt_input_key(inputs, self.memory_variables) + else: + prompt_input_key = self.input_key + for entity in self.entity_cache: + chain = LLMChain(llm=self.llm, prompt=self.entity_summarization_prompt) + # key value store for entity + existing_summary = self.store.get(entity, "") + buffer_string = get_buffer_string( + self.buffer[-self.k * 2 :], + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + + output = chain.predict( + summary=existing_summary, + history=buffer_string, + input=inputs[prompt_input_key], + entity=entity, + ) + self.store[entity] = output.strip() + + def clear(self) -> None: + """Clear memory contents.""" + self.chat_memory.clear() + self.store = {} diff --git a/langchain/memory/kg.py b/langchain/memory/kg.py new file mode 100644 index 0000000000000000000000000000000000000000..b0aaa4c0eda3d81f2aa483df922d6488d1e3e0de --- /dev/null +++ b/langchain/memory/kg.py @@ -0,0 +1,137 @@ +from typing import Any, Dict, List, Type, Union + +from pydantic import BaseModel, Field + +from langchain.chains.llm import LLMChain +from langchain.graphs import NetworkxEntityGraph +from langchain.graphs.networkx_graph import KnowledgeTriple, get_entities, parse_triples +from langchain.memory.chat_memory import BaseChatMemory +from langchain.memory.prompt import ( + ENTITY_EXTRACTION_PROMPT, + KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT, +) +from langchain.memory.utils import get_prompt_input_key +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import ( + BaseLanguageModel, + BaseMessage, + SystemMessage, + get_buffer_string, +) + + +class ConversationKGMemory(BaseChatMemory, BaseModel): + """Knowledge graph memory for storing conversation memory. + + Integrates with external knowledge graph to store and retrieve + information about knowledge triples in the conversation. + """ + + k: int = 2 + human_prefix: str = "Human" + ai_prefix: str = "AI" + kg: NetworkxEntityGraph = Field(default_factory=NetworkxEntityGraph) + knowledge_extraction_prompt: BasePromptTemplate = KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT + entity_extraction_prompt: BasePromptTemplate = ENTITY_EXTRACTION_PROMPT + llm: BaseLanguageModel + summary_message_cls: Type[BaseMessage] = SystemMessage + """Number of previous utterances to include in the context.""" + memory_key: str = "history" #: :meta private: + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + entities = self._get_current_entities(inputs) + summaries = {} + for entity in entities: + knowledge = self.kg.get_entity_knowledge(entity) + if knowledge: + summaries[entity] = ". ".join(knowledge) + "." + context: Union[str, List] + if summaries: + summary_strings = [ + f"On {entity}: {summary}" for entity, summary in summaries.items() + ] + if self.return_messages: + context = [ + self.summary_message_cls(content=text) for text in summary_strings + ] + else: + context = "\n".join(summary_strings) + else: + if self.return_messages: + context = [] + else: + context = "" + return {self.memory_key: context} + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def _get_prompt_input_key(self, inputs: Dict[str, Any]) -> str: + """Get the input key for the prompt.""" + if self.input_key is None: + return get_prompt_input_key(inputs, self.memory_variables) + return self.input_key + + def _get_prompt_output_key(self, outputs: Dict[str, Any]) -> str: + """Get the output key for the prompt.""" + if self.output_key is None: + if len(outputs) != 1: + raise ValueError(f"One output key expected, got {outputs.keys()}") + return list(outputs.keys())[0] + return self.output_key + + def get_current_entities(self, input_string: str) -> List[str]: + chain = LLMChain(llm=self.llm, prompt=self.entity_extraction_prompt) + buffer_string = get_buffer_string( + self.chat_memory.messages[-self.k * 2 :], + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + output = chain.predict( + history=buffer_string, + input=input_string, + ) + return get_entities(output) + + def _get_current_entities(self, inputs: Dict[str, Any]) -> List[str]: + """Get the current entities in the conversation.""" + prompt_input_key = self._get_prompt_input_key(inputs) + return self.get_current_entities(inputs[prompt_input_key]) + + def get_knowledge_triplets(self, input_string: str) -> List[KnowledgeTriple]: + chain = LLMChain(llm=self.llm, prompt=self.knowledge_extraction_prompt) + buffer_string = get_buffer_string( + self.chat_memory.messages[-self.k * 2 :], + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + output = chain.predict( + history=buffer_string, + input=input_string, + verbose=True, + ) + knowledge = parse_triples(output) + return knowledge + + def _get_and_update_kg(self, inputs: Dict[str, Any]) -> None: + """Get and update knowledge graph from the conversation history.""" + prompt_input_key = self._get_prompt_input_key(inputs) + knowledge = self.get_knowledge_triplets(inputs[prompt_input_key]) + for triple in knowledge: + self.kg.add_triple(triple) + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + super().save_context(inputs, outputs) + self._get_and_update_kg(inputs) + + def clear(self) -> None: + """Clear memory contents.""" + super().clear() + self.kg.clear() diff --git a/langchain/memory/prompt.py b/langchain/memory/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..6f7338ff81d34a5887b5818ae38c03c442463472 --- /dev/null +++ b/langchain/memory/prompt.py @@ -0,0 +1,165 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_DEFAULT_ENTITY_MEMORY_CONVERSATION_TEMPLATE = """You are an assistant to a human, powered by a large language model trained by OpenAI. + +You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. + +You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics. + +Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist. + +Context: +{entities} + +Current conversation: +{history} +Last line: +Human: {input} +You:""" + +ENTITY_MEMORY_CONVERSATION_TEMPLATE = PromptTemplate( + input_variables=["entities", "history", "input"], + template=_DEFAULT_ENTITY_MEMORY_CONVERSATION_TEMPLATE, +) + +_DEFAULT_SUMMARIZER_TEMPLATE = """Progressively summarize the lines of conversation provided, adding onto the previous summary returning a new summary. + +EXAMPLE +Current summary: +The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good. + +New lines of conversation: +Human: Why do you think artificial intelligence is a force for good? +AI: Because artificial intelligence will help humans reach their full potential. + +New summary: +The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good because it will help humans reach their full potential. +END OF EXAMPLE + +Current summary: +{summary} + +New lines of conversation: +{new_lines} + +New summary:""" +SUMMARY_PROMPT = PromptTemplate( + input_variables=["summary", "new_lines"], template=_DEFAULT_SUMMARIZER_TEMPLATE +) + +_DEFAULT_ENTITY_EXTRACTION_TEMPLATE = """You are an AI assistant reading the transcript of a conversation between an AI and a human. Extract all of the proper nouns from the last line of conversation. As a guideline, a proper noun is generally capitalized. You should definitely extract all names and places. + +The conversation history is provided just in case of a coreference (e.g. "What do you know about him" where "him" is defined in a previous line) -- ignore items mentioned there that are not in the last line. + +Return the output as a single comma-separated list, or NONE if there is nothing of note to return (e.g. the user is just issuing a greeting or having a simple conversation). + +EXAMPLE +Conversation history: +Person #1: how's it going today? +AI: "It's going great! How about you?" +Person #1: good! busy working on Langchain. lots to do. +AI: "That sounds like a lot of work! What kind of things are you doing to make Langchain better?" +Last line: +Person #1: i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. +Output: Langchain +END OF EXAMPLE + +EXAMPLE +Conversation history: +Person #1: how's it going today? +AI: "It's going great! How about you?" +Person #1: good! busy working on Langchain. lots to do. +AI: "That sounds like a lot of work! What kind of things are you doing to make Langchain better?" +Last line: +Person #1: i'm trying to improve Langchain's interfaces, the UX, its integrations with various products the user might want ... a lot of stuff. I'm working with Person #2. +Output: Langchain, Person #2 +END OF EXAMPLE + +Conversation history (for reference only): +{history} +Last line of conversation (for extraction): +Human: {input} + +Output:""" +ENTITY_EXTRACTION_PROMPT = PromptTemplate( + input_variables=["history", "input"], template=_DEFAULT_ENTITY_EXTRACTION_TEMPLATE +) + +_DEFAULT_ENTITY_SUMMARIZATION_TEMPLATE = """You are an AI assistant helping a human keep track of facts about relevant people, places, and concepts in their life. Update the summary of the provided entity in the "Entity" section based on the last line of your conversation with the human. If you are writing the summary for the first time, return a single sentence. +The update should only include facts that are relayed in the last line of conversation about the provided entity, and should only contain facts about the provided entity. + +If there is no new information about the provided entity or the information is not worth noting (not an important or relevant fact to remember long-term), return the existing summary unchanged. + +Full conversation history (for context): +{history} + +Entity to summarize: +{entity} + +Existing summary of {entity}: +{summary} + +Last line of conversation: +Human: {input} +Updated summary:""" + +ENTITY_SUMMARIZATION_PROMPT = PromptTemplate( + input_variables=["entity", "summary", "history", "input"], + template=_DEFAULT_ENTITY_SUMMARIZATION_TEMPLATE, +) + + +KG_TRIPLE_DELIMITER = "<|>" +_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE = ( + "You are a networked intelligence helping a human track knowledge triples" + " about all relevant people, things, concepts, etc. and integrating" + " them with your knowledge stored within your weights" + " as well as that stored in a knowledge graph." + " Extract all of the knowledge triples from the last line of conversation." + " A knowledge triple is a clause that contains a subject, a predicate," + " and an object. The subject is the entity being described," + " the predicate is the property of the subject that is being" + " described, and the object is the value of the property.\n\n" + "EXAMPLE\n" + "Conversation history:\n" + "Person #1: Did you hear aliens landed in Area 51?\n" + "AI: No, I didn't hear that. What do you know about Area 51?\n" + "Person #1: It's a secret military base in Nevada.\n" + "AI: What do you know about Nevada?\n" + "Last line of conversation:\n" + "Person #1: It's a state in the US. It's also the number 1 producer of gold in the US.\n\n" + f"Output: (Nevada, is a, state){KG_TRIPLE_DELIMITER}(Nevada, is in, US)" + f"{KG_TRIPLE_DELIMITER}(Nevada, is the number 1 producer of, gold)\n" + "END OF EXAMPLE\n\n" + "EXAMPLE\n" + "Conversation history:\n" + "Person #1: Hello.\n" + "AI: Hi! How are you?\n" + "Person #1: I'm good. How are you?\n" + "AI: I'm good too.\n" + "Last line of conversation:\n" + "Person #1: I'm going to the store.\n\n" + "Output: NONE\n" + "END OF EXAMPLE\n\n" + "EXAMPLE\n" + "Conversation history:\n" + "Person #1: What do you know about Descartes?\n" + "AI: Descartes was a French philosopher, mathematician, and scientist who lived in the 17th century.\n" + "Person #1: The Descartes I'm referring to is a standup comedian and interior designer from Montreal.\n" + "AI: Oh yes, He is a comedian and an interior designer. He has been in the industry for 30 years. His favorite food is baked bean pie.\n" + "Person #1: Oh huh. I know Descartes likes to drive antique scooters and play the mandolin.\n" + "Last line of conversation:\n" + f"Output: (Descartes, likes to drive, antique scooters){KG_TRIPLE_DELIMITER}(Descartes, plays, mandolin)\n" + "END OF EXAMPLE\n\n" + "Conversation history (for reference only):\n" + "{history}" + "\nLast line of conversation (for extraction):\n" + "Human: {input}\n\n" + "Output:" +) + +KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT = PromptTemplate( + input_variables=["history", "input"], + template=_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE, +) diff --git a/langchain/memory/readonly.py b/langchain/memory/readonly.py new file mode 100644 index 0000000000000000000000000000000000000000..78a6769b0a31c499161b87e1a6b18c5b2c9da75a --- /dev/null +++ b/langchain/memory/readonly.py @@ -0,0 +1,26 @@ +from typing import Any, Dict, List + +from langchain.schema import BaseMemory + + +class ReadOnlySharedMemory(BaseMemory): + """A memory wrapper that is read-only and cannot be changed.""" + + memory: BaseMemory + + @property + def memory_variables(self) -> List[str]: + """Return memory variables.""" + return self.memory.memory_variables + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + """Load memory variables from memory.""" + return self.memory.load_memory_variables(inputs) + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Nothing should be saved or changed""" + pass + + def clear(self) -> None: + """Nothing to clear, got a memory like a vault.""" + pass diff --git a/langchain/memory/simple.py b/langchain/memory/simple.py new file mode 100644 index 0000000000000000000000000000000000000000..c5be80ea02eabb5d9d7f5ba925283624cdfadd39 --- /dev/null +++ b/langchain/memory/simple.py @@ -0,0 +1,28 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel + +from langchain.schema import BaseMemory + + +class SimpleMemory(BaseMemory, BaseModel): + """Simple memory for storing context or other bits of information that shouldn't + ever change between prompts. + """ + + memories: Dict[str, Any] = dict() + + @property + def memory_variables(self) -> List[str]: + return list(self.memories.keys()) + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]: + return self.memories + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Nothing should be saved or changed, my memory is set in stone.""" + pass + + def clear(self) -> None: + """Nothing to clear, got a memory like a vault.""" + pass diff --git a/langchain/memory/summary.py b/langchain/memory/summary.py new file mode 100644 index 0000000000000000000000000000000000000000..4f9b27e1663844425743a581986cd188f9a1dbb3 --- /dev/null +++ b/langchain/memory/summary.py @@ -0,0 +1,81 @@ +from typing import Any, Dict, List, Type + +from pydantic import BaseModel, root_validator + +from langchain.chains.llm import LLMChain +from langchain.memory.chat_memory import BaseChatMemory +from langchain.memory.prompt import SUMMARY_PROMPT +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import ( + BaseLanguageModel, + BaseMessage, + SystemMessage, + get_buffer_string, +) + + +class SummarizerMixin(BaseModel): + human_prefix: str = "Human" + ai_prefix: str = "AI" + llm: BaseLanguageModel + prompt: BasePromptTemplate = SUMMARY_PROMPT + summary_message_cls: Type[BaseMessage] = SystemMessage + + def predict_new_summary( + self, messages: List[BaseMessage], existing_summary: str + ) -> str: + new_lines = get_buffer_string( + messages, + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + + chain = LLMChain(llm=self.llm, prompt=self.prompt) + return chain.predict(summary=existing_summary, new_lines=new_lines) + + +class ConversationSummaryMemory(BaseChatMemory, SummarizerMixin, BaseModel): + """Conversation summarizer to memory.""" + + buffer: str = "" + memory_key: str = "history" #: :meta private: + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + if self.return_messages: + buffer: Any = [self.summary_message_cls(content=self.buffer)] + else: + buffer = self.buffer + return {self.memory_key: buffer} + + @root_validator() + def validate_prompt_input_variables(cls, values: Dict) -> Dict: + """Validate that prompt input variables are consistent.""" + prompt_variables = values["prompt"].input_variables + expected_keys = {"summary", "new_lines"} + if expected_keys != set(prompt_variables): + raise ValueError( + "Got unexpected prompt input variables. The prompt expects " + f"{prompt_variables}, but it should have {expected_keys}." + ) + return values + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + super().save_context(inputs, outputs) + self.buffer = self.predict_new_summary( + self.chat_memory.messages[-2:], self.buffer + ) + + def clear(self) -> None: + """Clear memory contents.""" + super().clear() + self.buffer = "" diff --git a/langchain/memory/summary_buffer.py b/langchain/memory/summary_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..0e5b4734ee1decf801fd813c5e7011a920d7d8d7 --- /dev/null +++ b/langchain/memory/summary_buffer.py @@ -0,0 +1,75 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel, root_validator + +from langchain.memory.chat_memory import BaseChatMemory +from langchain.memory.summary import SummarizerMixin +from langchain.schema import BaseMessage, get_buffer_string + + +class ConversationSummaryBufferMemory(BaseChatMemory, SummarizerMixin, BaseModel): + """Buffer with summarizer for storing conversation memory.""" + + max_token_limit: int = 2000 + moving_summary_buffer: str = "" + memory_key: str = "history" + + @property + def buffer(self) -> List[BaseMessage]: + return self.chat_memory.messages + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + buffer = self.buffer + if self.moving_summary_buffer != "": + first_messages: List[BaseMessage] = [ + self.summary_message_cls(content=self.moving_summary_buffer) + ] + buffer = first_messages + buffer + if self.return_messages: + final_buffer: Any = buffer + else: + final_buffer = get_buffer_string( + buffer, human_prefix=self.human_prefix, ai_prefix=self.ai_prefix + ) + return {self.memory_key: final_buffer} + + @root_validator() + def validate_prompt_input_variables(cls, values: Dict) -> Dict: + """Validate that prompt input variables are consistent.""" + prompt_variables = values["prompt"].input_variables + expected_keys = {"summary", "new_lines"} + if expected_keys != set(prompt_variables): + raise ValueError( + "Got unexpected prompt input variables. The prompt expects " + f"{prompt_variables}, but it should have {expected_keys}." + ) + return values + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer.""" + super().save_context(inputs, outputs) + # Prune buffer if it exceeds max token limit + buffer = self.chat_memory.messages + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) + if curr_buffer_length > self.max_token_limit: + pruned_memory = [] + while curr_buffer_length > self.max_token_limit: + pruned_memory.append(buffer.pop(0)) + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) + self.moving_summary_buffer = self.predict_new_summary( + pruned_memory, self.moving_summary_buffer + ) + + def clear(self) -> None: + """Clear memory contents.""" + super().clear() + self.moving_summary_buffer = "" diff --git a/langchain/memory/token_buffer.py b/langchain/memory/token_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..3bd9b68410e470a2ec868770eb9c77267ae8f0f3 --- /dev/null +++ b/langchain/memory/token_buffer.py @@ -0,0 +1,54 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel + +from langchain.memory.chat_memory import BaseChatMemory +from langchain.schema import BaseLanguageModel, BaseMessage, get_buffer_string + + +class ConversationTokenBufferMemory(BaseChatMemory, BaseModel): + """Buffer for storing conversation memory.""" + + human_prefix: str = "Human" + ai_prefix: str = "AI" + llm: BaseLanguageModel + memory_key: str = "history" + max_token_limit: int = 2000 + + @property + def buffer(self) -> List[BaseMessage]: + """String buffer of memory.""" + return self.chat_memory.messages + + @property + def memory_variables(self) -> List[str]: + """Will always return list of memory variables. + + :meta private: + """ + return [self.memory_key] + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history buffer.""" + buffer: Any = self.buffer + if self.return_messages: + final_buffer: Any = buffer + else: + final_buffer = get_buffer_string( + buffer, + human_prefix=self.human_prefix, + ai_prefix=self.ai_prefix, + ) + return {self.memory_key: final_buffer} + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer. Pruned.""" + super().save_context(inputs, outputs) + # Prune buffer if it exceeds max token limit + buffer = self.chat_memory.messages + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) + if curr_buffer_length > self.max_token_limit: + pruned_memory = [] + while curr_buffer_length > self.max_token_limit: + pruned_memory.append(buffer.pop(0)) + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) diff --git a/langchain/memory/utils.py b/langchain/memory/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ecff262416713d04ae6061a4b708602c44565590 --- /dev/null +++ b/langchain/memory/utils.py @@ -0,0 +1,12 @@ +from typing import Any, Dict, List + +from langchain.schema import get_buffer_string # noqa: 401 + + +def get_prompt_input_key(inputs: Dict[str, Any], memory_variables: List[str]) -> str: + # "stop" is a special key that can be passed as input but is not used to + # format the prompt. + prompt_input_keys = list(set(inputs).difference(memory_variables + ["stop"])) + if len(prompt_input_keys) != 1: + raise ValueError(f"One input key expected got {prompt_input_keys}") + return prompt_input_keys[0] diff --git a/langchain/model_laboratory.py b/langchain/model_laboratory.py new file mode 100644 index 0000000000000000000000000000000000000000..0ba871b9bd56ad0d4ed94700723319764895bb95 --- /dev/null +++ b/langchain/model_laboratory.py @@ -0,0 +1,82 @@ +"""Experiment with different models.""" +from __future__ import annotations + +from typing import List, Optional, Sequence + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.input import get_color_mapping, print_text +from langchain.llms.base import BaseLLM +from langchain.prompts.prompt import PromptTemplate + + +class ModelLaboratory: + """Experiment with different models.""" + + def __init__(self, chains: Sequence[Chain], names: Optional[List[str]] = None): + """Initialize with chains to experiment with. + + Args: + chains: list of chains to experiment with. + """ + for chain in chains: + if not isinstance(chain, Chain): + raise ValueError( + "ModelLaboratory should now be initialized with Chains. " + "If you want to initialize with LLMs, use the `from_llms` method " + "instead (`ModelLaboratory.from_llms(...)`)" + ) + if len(chain.input_keys) != 1: + raise ValueError( + "Currently only support chains with one input variable, " + f"got {chain.input_keys}" + ) + if len(chain.output_keys) != 1: + raise ValueError( + "Currently only support chains with one output variable, " + f"got {chain.output_keys}" + ) + if names is not None: + if len(names) != len(chains): + raise ValueError("Length of chains does not match length of names.") + self.chains = chains + chain_range = [str(i) for i in range(len(self.chains))] + self.chain_colors = get_color_mapping(chain_range) + self.names = names + + @classmethod + def from_llms( + cls, llms: List[BaseLLM], prompt: Optional[PromptTemplate] = None + ) -> ModelLaboratory: + """Initialize with LLMs to experiment with and optional prompt. + + Args: + llms: list of LLMs to experiment with + prompt: Optional prompt to use to prompt the LLMs. Defaults to None. + If a prompt was provided, it should only have one input variable. + """ + if prompt is None: + prompt = PromptTemplate(input_variables=["_input"], template="{_input}") + chains = [LLMChain(llm=llm, prompt=prompt) for llm in llms] + names = [str(llm) for llm in llms] + return cls(chains, names=names) + + def compare(self, text: str) -> None: + """Compare model outputs on an input text. + + If a prompt was provided with starting the laboratory, then this text will be + fed into the prompt. If no prompt was provided, then the input text is the + entire prompt. + + Args: + text: input text to run all models on. + """ + print(f"\033[1mInput:\033[0m\n{text}\n") + for i, chain in enumerate(self.chains): + if self.names is not None: + name = self.names[i] + else: + name = str(chain) + print_text(name, end="\n") + output = chain.run(text) + print_text(output, color=self.chain_colors[str(i)], end="\n\n") diff --git a/langchain/output_parsers/__init__.py b/langchain/output_parsers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4133e4a199b67f0ae1664b2c7cac006b6cd5b714 --- /dev/null +++ b/langchain/output_parsers/__init__.py @@ -0,0 +1,25 @@ +from langchain.output_parsers.fix import OutputFixingParser +from langchain.output_parsers.list import ( + CommaSeparatedListOutputParser, + ListOutputParser, +) +from langchain.output_parsers.pydantic import PydanticOutputParser +from langchain.output_parsers.rail_parser import GuardrailsOutputParser +from langchain.output_parsers.regex import RegexParser +from langchain.output_parsers.regex_dict import RegexDictParser +from langchain.output_parsers.retry import RetryOutputParser, RetryWithErrorOutputParser +from langchain.output_parsers.structured import ResponseSchema, StructuredOutputParser + +__all__ = [ + "RegexParser", + "RegexDictParser", + "ListOutputParser", + "CommaSeparatedListOutputParser", + "StructuredOutputParser", + "ResponseSchema", + "GuardrailsOutputParser", + "PydanticOutputParser", + "RetryOutputParser", + "RetryWithErrorOutputParser", + "OutputFixingParser", +] diff --git a/langchain/output_parsers/__pycache__/__init__.cpython-39.pyc b/langchain/output_parsers/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..062e3aafdcf75e3c7ce9b41bb31a99a4f4151980 Binary files /dev/null and b/langchain/output_parsers/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/base.cpython-39.pyc b/langchain/output_parsers/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7411144c9e86ac9deb6eaf9c1cb8f04b63e9887f Binary files /dev/null and b/langchain/output_parsers/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/fix.cpython-39.pyc b/langchain/output_parsers/__pycache__/fix.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68bcdbae5753f1614131bf042391d9a3c9d4480d Binary files /dev/null and b/langchain/output_parsers/__pycache__/fix.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/format_instructions.cpython-39.pyc b/langchain/output_parsers/__pycache__/format_instructions.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..660e8980e48072ca130de27263ebcaa9e1d96672 Binary files /dev/null and b/langchain/output_parsers/__pycache__/format_instructions.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/list.cpython-39.pyc b/langchain/output_parsers/__pycache__/list.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbc482a5b8c87c756ccdba3205bf0bb8e4bfb749 Binary files /dev/null and b/langchain/output_parsers/__pycache__/list.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/prompts.cpython-39.pyc b/langchain/output_parsers/__pycache__/prompts.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f28f7dfcff9f4fcb5b2443ea6bd71278a36b5298 Binary files /dev/null and b/langchain/output_parsers/__pycache__/prompts.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/pydantic.cpython-39.pyc b/langchain/output_parsers/__pycache__/pydantic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1118ac920211c20ed3d45be0607c62df298ec83a Binary files /dev/null and b/langchain/output_parsers/__pycache__/pydantic.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/rail_parser.cpython-39.pyc b/langchain/output_parsers/__pycache__/rail_parser.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cd86e03065e1cf07fa792caf77331b675cdde91 Binary files /dev/null and b/langchain/output_parsers/__pycache__/rail_parser.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/regex.cpython-39.pyc b/langchain/output_parsers/__pycache__/regex.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d60b8ca3b5aed8b6293d01a8780f9656d0da612 Binary files /dev/null and b/langchain/output_parsers/__pycache__/regex.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/regex_dict.cpython-39.pyc b/langchain/output_parsers/__pycache__/regex_dict.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52b089650ce53d752d0214958564c082910e2b4a Binary files /dev/null and b/langchain/output_parsers/__pycache__/regex_dict.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/retry.cpython-39.pyc b/langchain/output_parsers/__pycache__/retry.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..169ce4df44d0b34f3c4ab89585645d437d980fb8 Binary files /dev/null and b/langchain/output_parsers/__pycache__/retry.cpython-39.pyc differ diff --git a/langchain/output_parsers/__pycache__/structured.cpython-39.pyc b/langchain/output_parsers/__pycache__/structured.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4cc25ca1321a681990a070d0b2982087332c2b5 Binary files /dev/null and b/langchain/output_parsers/__pycache__/structured.cpython-39.pyc differ diff --git a/langchain/output_parsers/fix.py b/langchain/output_parsers/fix.py new file mode 100644 index 0000000000000000000000000000000000000000..2654948ad20fd32739781f9eb1167a7f5b7a3e88 --- /dev/null +++ b/langchain/output_parsers/fix.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import Any + +from langchain.chains.llm import LLMChain +from langchain.output_parsers.prompts import NAIVE_FIX_PROMPT +from langchain.prompts.base import BasePromptTemplate +from langchain.schema import BaseLanguageModel, BaseOutputParser, OutputParserException + + +class OutputFixingParser(BaseOutputParser): + """Wraps a parser and tries to fix parsing errors.""" + + parser: BaseOutputParser + retry_chain: LLMChain + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + parser: BaseOutputParser, + prompt: BasePromptTemplate = NAIVE_FIX_PROMPT, + ) -> OutputFixingParser: + chain = LLMChain(llm=llm, prompt=prompt) + return cls(parser=parser, retry_chain=chain) + + def parse(self, completion: str) -> Any: + try: + parsed_completion = self.parser.parse(completion) + except OutputParserException as e: + new_completion = self.retry_chain.run( + instructions=self.parser.get_format_instructions(), + completion=completion, + error=repr(e), + ) + parsed_completion = self.parser.parse(new_completion) + + return parsed_completion + + def get_format_instructions(self) -> str: + return self.parser.get_format_instructions() diff --git a/langchain/output_parsers/format_instructions.py b/langchain/output_parsers/format_instructions.py new file mode 100644 index 0000000000000000000000000000000000000000..18894442a6f080620518b1e9e196432a71e5bf6d --- /dev/null +++ b/langchain/output_parsers/format_instructions.py @@ -0,0 +1,15 @@ +# flake8: noqa + +STRUCTURED_FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet formatted in the following schema: + +```json +{{ +{format} +}} +```""" + +PYDANTIC_FORMAT_INSTRUCTIONS = """Ensure the final answer be formatted as a JSON instance that conforms to the JSON schema below. +Here is the schema for the final answer: +``` +{schema} +```""" diff --git a/langchain/output_parsers/list.py b/langchain/output_parsers/list.py new file mode 100644 index 0000000000000000000000000000000000000000..32b26742d81d490a51b3ad643b55db2b71c66a25 --- /dev/null +++ b/langchain/output_parsers/list.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from abc import abstractmethod +from typing import List + +from langchain.schema import BaseOutputParser + + +class ListOutputParser(BaseOutputParser): + """Class to parse the output of an LLM call to a list.""" + + @abstractmethod + def parse(self, text: str) -> List[str]: + """Parse the output of an LLM call.""" + + +class CommaSeparatedListOutputParser(ListOutputParser): + """Parse out comma separated lists.""" + + def get_format_instructions(self) -> str: + return ( + "Your response should be a list of comma separated values, " + "eg: `foo, bar, baz`" + ) + + def parse(self, text: str) -> List[str]: + """Parse the output of an LLM call.""" + return text.strip().split(", ") diff --git a/langchain/output_parsers/loading.py b/langchain/output_parsers/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..7acd5aa95bbc49273951813bc8a8bd7e2ec22a0a --- /dev/null +++ b/langchain/output_parsers/loading.py @@ -0,0 +1,15 @@ +from langchain.output_parsers.regex import RegexParser + + +def load_output_parser(config: dict) -> dict: + """Load output parser.""" + if "output_parsers" in config: + if config["output_parsers"] is not None: + _config = config["output_parsers"] + output_parser_type = _config["_type"] + if output_parser_type == "regex_parser": + output_parser = RegexParser(**_config) + else: + raise ValueError(f"Unsupported output parser {output_parser_type}") + config["output_parsers"] = output_parser + return config diff --git a/langchain/output_parsers/prompts.py b/langchain/output_parsers/prompts.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea37b24a26a6a0b0330afceb81ab621f252ca95 --- /dev/null +++ b/langchain/output_parsers/prompts.py @@ -0,0 +1,22 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +NAIVE_FIX = """Instructions: +-------------- +{instructions} +-------------- +Completion: +-------------- +{completion} +-------------- + +Above, the Completion did not satisfy the constraints given in the Instructions. +Error: +-------------- +{error} +-------------- + +Please try again. Please only respond with an answer that satisfies the constraints laid out in the Instructions:""" + + +NAIVE_FIX_PROMPT = PromptTemplate.from_template(NAIVE_FIX) diff --git a/langchain/output_parsers/pydantic.py b/langchain/output_parsers/pydantic.py new file mode 100644 index 0000000000000000000000000000000000000000..228069afbdcb102ad346356d797cbd7e40c58a66 --- /dev/null +++ b/langchain/output_parsers/pydantic.py @@ -0,0 +1,61 @@ +import json +import re +from typing import Any, List, Tuple + +from pydantic import BaseModel, ValidationError, Field + +from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS +from langchain.schema import BaseOutputParser, OutputParserException + + +class SQLOutput(BaseModel): + sql_query: str = Field(description="sql query to get the final answer") + column_names: List[str] = Field(description="column names of the sql query output") + # query_result: List[Tuple[str]] = Field(description="the sql query's output, each tuple is a row of the output," + # "should match eactly the last observation's data") + chart_type: str = Field(description="the best chart type to visualize the sql query output," + "should be one of ['bar', 'line', 'pie', 'table'], " + "use line for timeseries data, " + "if there are more than 3 column names use table data, " + "use pie for percentage data") + + +class SQLThink(BaseModel): + thought: str = Field(description="think to get the final answer, you should always think about what to do") + clarification: str = Field(description="clarification question to the user if the analytics question is not clear") + plan: str = Field(description="plan to get the final answer, you should always plan before you take action") + + +class PydanticOutputParser(BaseOutputParser): + pydantic_object: Any + + def parse(self, text: str) -> BaseModel: + try: + # Greedy search for 1st json candidate. + match = re.search( + "\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL + ) + json_str = "" + if match: + json_str = match.group() + json_object = json.loads(json_str) + return self.pydantic_object.parse_obj(json_object) + + except (json.JSONDecodeError, ValidationError) as e: + name = self.pydantic_object.__name__ + msg = f"Failed to parse {name} from completion {text}. Got: {e}" + raise OutputParserException(msg) + + def get_format_instructions(self) -> str: + schema = self.pydantic_object.schema() + + # Remove extraneous fields. + reduced_schema = schema + if "title" in reduced_schema: + del reduced_schema["title"] + if "type" in reduced_schema: + del reduced_schema["type"] + # Ensure json in context is well-formed with double quotes. + schema = json.dumps(reduced_schema) + + return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema) diff --git a/langchain/output_parsers/rail_parser.py b/langchain/output_parsers/rail_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..0dab50d9a894cd2d83314c6cb1e899d09076f0a9 --- /dev/null +++ b/langchain/output_parsers/rail_parser.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Any, Dict + +from langchain.schema import BaseOutputParser + + +class GuardrailsOutputParser(BaseOutputParser): + guard: Any + + @property + def _type(self) -> str: + return "guardrails" + + @classmethod + def from_rail(cls, rail_file: str, num_reasks: int = 1) -> GuardrailsOutputParser: + try: + from guardrails import Guard + except ImportError: + raise ValueError( + "guardrails-ai package not installed. " + "Install it by running `pip install guardrails-ai`." + ) + return cls(guard=Guard.from_rail(rail_file, num_reasks=num_reasks)) + + @classmethod + def from_rail_string( + cls, rail_str: str, num_reasks: int = 1 + ) -> GuardrailsOutputParser: + try: + from guardrails import Guard + except ImportError: + raise ValueError( + "guardrails-ai package not installed. " + "Install it by running `pip install guardrails-ai`." + ) + return cls(guard=Guard.from_rail_string(rail_str, num_reasks=num_reasks)) + + def get_format_instructions(self) -> str: + return self.guard.raw_prompt.format_instructions + + def parse(self, text: str) -> Dict: + return self.guard.parse(text) diff --git a/langchain/output_parsers/regex.py b/langchain/output_parsers/regex.py new file mode 100644 index 0000000000000000000000000000000000000000..dd03556bb62dd38c07af81282cd83fa94dfaf93c --- /dev/null +++ b/langchain/output_parsers/regex.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import re +from typing import Dict, List, Optional + +from pydantic import BaseModel + +from langchain.schema import BaseOutputParser + + +class RegexParser(BaseOutputParser, BaseModel): + """Class to parse the output into a dictionary.""" + + regex: str + output_keys: List[str] + default_output_key: Optional[str] = None + + @property + def _type(self) -> str: + """Return the type key.""" + return "regex_parser" + + def parse(self, text: str) -> Dict[str, str]: + """Parse the output of an LLM call.""" + match = re.search(self.regex, text) + if match: + return {key: match.group(i + 1) for i, key in enumerate(self.output_keys)} + else: + if self.default_output_key is None: + raise ValueError(f"Could not parse output: {text}") + else: + return { + key: text if key == self.default_output_key else "" + for key in self.output_keys + } diff --git a/langchain/output_parsers/regex_dict.py b/langchain/output_parsers/regex_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..d37f25640f4aa720a52890396d0b26e0477b2d7f --- /dev/null +++ b/langchain/output_parsers/regex_dict.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import re +from typing import Dict, Optional + +from pydantic import BaseModel + +from langchain.schema import BaseOutputParser + + +class RegexDictParser(BaseOutputParser, BaseModel): + """Class to parse the output into a dictionary.""" + + regex_pattern: str = r"{}:\s?([^.'\n']*)\.?" # : :meta private: + output_key_to_format: Dict[str, str] + no_update_value: Optional[str] = None + + @property + def _type(self) -> str: + """Return the type key.""" + return "regex_dict_parser" + + def parse(self, text: str) -> Dict[str, str]: + """Parse the output of an LLM call.""" + result = {} + for output_key, expected_format in self.output_key_to_format.items(): + specific_regex = self.regex_pattern.format(re.escape(expected_format)) + matches = re.findall(specific_regex, text) + if not matches: + raise ValueError( + f"No match found for output key: {output_key} with expected format \ + {expected_format} on text {text}" + ) + elif len(matches) > 1: + raise ValueError( + f"Multiple matches found for output key: {output_key} with \ + expected format {expected_format} on text {text}" + ) + elif ( + self.no_update_value is not None and matches[0] == self.no_update_value + ): + continue + else: + result[output_key] = matches[0] + return result diff --git a/langchain/output_parsers/retry.py b/langchain/output_parsers/retry.py new file mode 100644 index 0000000000000000000000000000000000000000..7c6760ea6f698748b6a7c2f8196bbc122f34e6f4 --- /dev/null +++ b/langchain/output_parsers/retry.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from typing import Any + +from langchain.chains.llm import LLMChain +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import ( + BaseLanguageModel, + BaseOutputParser, + OutputParserException, + PromptValue, +) + +NAIVE_COMPLETION_RETRY = """Prompt: +{prompt} +Completion: +{completion} + +Above, the Completion did not satisfy the constraints given in the Prompt. +Please try again:""" + +NAIVE_COMPLETION_RETRY_WITH_ERROR = """Prompt: +{prompt} +Completion: +{completion} + +Above, the Completion did not satisfy the constraints given in the Prompt. +Details: {error} +Please try again:""" + +NAIVE_RETRY_PROMPT = PromptTemplate.from_template(NAIVE_COMPLETION_RETRY) +NAIVE_RETRY_WITH_ERROR_PROMPT = PromptTemplate.from_template( + NAIVE_COMPLETION_RETRY_WITH_ERROR +) + + +class RetryOutputParser(BaseOutputParser): + """Wraps a parser and tries to fix parsing errors. + + Does this by passing the original prompt and the completion to another + LLM, and telling it the completion did not satisfy criteria in the prompt. + """ + + parser: BaseOutputParser + retry_chain: LLMChain + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + parser: BaseOutputParser, + prompt: BasePromptTemplate = NAIVE_RETRY_PROMPT, + ) -> RetryOutputParser: + chain = LLMChain(llm=llm, prompt=prompt) + return cls(parser=parser, retry_chain=chain) + + def parse_with_prompt(self, completion: str, prompt_value: PromptValue) -> Any: + try: + parsed_completion = self.parser.parse(completion) + except OutputParserException: + new_completion = self.retry_chain.run( + prompt=prompt_value.to_string(), completion=completion + ) + parsed_completion = self.parser.parse(new_completion) + + return parsed_completion + + def parse(self, completion: str) -> Any: + raise NotImplementedError( + "This OutputParser can only be called by the `parse_with_prompt` method." + ) + + def get_format_instructions(self) -> str: + return self.parser.get_format_instructions() + + +class RetryWithErrorOutputParser(BaseOutputParser): + """Wraps a parser and tries to fix parsing errors. + + Does this by passing the original prompt, the completion, AND the error + that was raised to another language and telling it that the completion + did not work, and raised the given error. Differs from RetryOutputParser + in that this implementation provides the error that was raised back to the + LLM, which in theory should give it more information on how to fix it. + """ + + parser: BaseOutputParser + retry_chain: LLMChain + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + parser: BaseOutputParser, + prompt: BasePromptTemplate = NAIVE_RETRY_WITH_ERROR_PROMPT, + ) -> RetryWithErrorOutputParser: + chain = LLMChain(llm=llm, prompt=prompt) + return cls(parser=parser, retry_chain=chain) + + def parse_with_prompt(self, completion: str, prompt_value: PromptValue) -> Any: + try: + parsed_completion = self.parser.parse(completion) + except OutputParserException as e: + new_completion = self.retry_chain.run( + prompt=prompt_value.to_string(), completion=completion, error=repr(e) + ) + parsed_completion = self.parser.parse(new_completion) + + return parsed_completion + + def parse(self, completion: str) -> Any: + raise NotImplementedError( + "This OutputParser can only be called by the `parse_with_prompt` method." + ) + + def get_format_instructions(self) -> str: + return self.parser.get_format_instructions() diff --git a/langchain/output_parsers/structured.py b/langchain/output_parsers/structured.py new file mode 100644 index 0000000000000000000000000000000000000000..9921e7c69f268b5e481f8aca349c3442e0bb0a04 --- /dev/null +++ b/langchain/output_parsers/structured.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import json +from typing import List + +from pydantic import BaseModel + +from langchain.output_parsers.format_instructions import STRUCTURED_FORMAT_INSTRUCTIONS +from langchain.schema import BaseOutputParser, OutputParserException + +line_template = '\t"{name}": {type} // {description}' + + +class ResponseSchema(BaseModel): + name: str + description: str + + +def _get_sub_string(schema: ResponseSchema) -> str: + return line_template.format( + name=schema.name, description=schema.description, type="string" + ) + + +class StructuredOutputParser(BaseOutputParser): + response_schemas: List[ResponseSchema] + + @classmethod + def from_response_schemas( + cls, response_schemas: List[ResponseSchema] + ) -> StructuredOutputParser: + return cls(response_schemas=response_schemas) + + def get_format_instructions(self) -> str: + schema_str = "\n".join( + [_get_sub_string(schema) for schema in self.response_schemas] + ) + return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str) + + def parse(self, text: str) -> BaseModel: + json_string = text.split("```json")[1].strip().strip("```").strip() + json_obj = json.loads(json_string) + for schema in self.response_schemas: + if schema.name not in json_obj: + raise OutputParserException( + f"Got invalid return object. Expected key `{schema.name}` " + f"to be present, but got {json_obj}" + ) + return json_obj diff --git a/langchain/prompts/__init__.py b/langchain/prompts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aef564b60178344ec1ce802e4681df70cf672739 --- /dev/null +++ b/langchain/prompts/__init__.py @@ -0,0 +1,30 @@ +"""Prompt template classes.""" +from langchain.prompts.base import BasePromptTemplate, StringPromptTemplate +from langchain.prompts.chat import ( + AIMessagePromptTemplate, + ChatMessagePromptTemplate, + ChatPromptTemplate, + HumanMessagePromptTemplate, + MessagesPlaceholder, + SystemMessagePromptTemplate, +) +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.few_shot_with_templates import FewShotPromptWithTemplates +from langchain.prompts.loading import load_prompt +from langchain.prompts.prompt import Prompt, PromptTemplate + +__all__ = [ + "BasePromptTemplate", + "StringPromptTemplate", + "load_prompt", + "PromptTemplate", + "FewShotPromptTemplate", + "Prompt", + "FewShotPromptWithTemplates", + "ChatPromptTemplate", + "MessagesPlaceholder", + "HumanMessagePromptTemplate", + "AIMessagePromptTemplate", + "SystemMessagePromptTemplate", + "ChatMessagePromptTemplate", +] diff --git a/langchain/prompts/__pycache__/__init__.cpython-39.pyc b/langchain/prompts/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1c464c396468ec05cc5e20d84d5e31594483e35 Binary files /dev/null and b/langchain/prompts/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/base.cpython-39.pyc b/langchain/prompts/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ad86c6b35f8ee952a4459bf99af230cfdb86e9b Binary files /dev/null and b/langchain/prompts/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/chat.cpython-39.pyc b/langchain/prompts/__pycache__/chat.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcf453502e631bcd340609f6ae4991524274d65a Binary files /dev/null and b/langchain/prompts/__pycache__/chat.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/few_shot.cpython-39.pyc b/langchain/prompts/__pycache__/few_shot.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86e1485d9c1d9e722854bc100591fe8283b41dd8 Binary files /dev/null and b/langchain/prompts/__pycache__/few_shot.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/few_shot_with_templates.cpython-39.pyc b/langchain/prompts/__pycache__/few_shot_with_templates.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4caffb3e278c5d42bbb5301baed4bcd109318508 Binary files /dev/null and b/langchain/prompts/__pycache__/few_shot_with_templates.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/loading.cpython-39.pyc b/langchain/prompts/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d797b45accb933a6c16ce710ddce3771bf27bca0 Binary files /dev/null and b/langchain/prompts/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/prompts/__pycache__/prompt.cpython-39.pyc b/langchain/prompts/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b03fadea055e6bf1039d141fbeac9b124199d25 Binary files /dev/null and b/langchain/prompts/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py new file mode 100644 index 0000000000000000000000000000000000000000..166ac4d5f60084954d81ffe5b4ee2b2ded71fea6 --- /dev/null +++ b/langchain/prompts/base.py @@ -0,0 +1,197 @@ +"""BasePrompt schema definition.""" +from __future__ import annotations + +import json +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Callable, Dict, List, Mapping, Optional, Union + +import yaml +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.formatting import formatter +from langchain.schema import BaseMessage, BaseOutputParser, HumanMessage, PromptValue + + +def jinja2_formatter(template: str, **kwargs: Any) -> str: + """Format a template using jinja2.""" + try: + from jinja2 import Template + except ImportError: + raise ValueError( + "jinja2 not installed, which is needed to use the jinja2_formatter. " + "Please install it with `pip install jinja2`." + ) + + return Template(template).render(**kwargs) + + +DEFAULT_FORMATTER_MAPPING: Dict[str, Callable] = { + "f-string": formatter.format, + "jinja2": jinja2_formatter, +} + + +def check_valid_template( + template: str, template_format: str, input_variables: List[str] +) -> None: + """Check that template string is valid.""" + if template_format not in DEFAULT_FORMATTER_MAPPING: + valid_formats = list(DEFAULT_FORMATTER_MAPPING) + raise ValueError( + f"Invalid template format. Got `{template_format}`;" + f" should be one of {valid_formats}" + ) + dummy_inputs = {input_variable: "foo" for input_variable in input_variables} + try: + formatter_func = DEFAULT_FORMATTER_MAPPING[template_format] + formatter_func(template, **dummy_inputs) + except KeyError as e: + raise ValueError( + "Invalid prompt schema; check for mismatched or missing input parameters. " + + str(e) + ) + + +class StringPromptValue(PromptValue): + text: str + + def to_string(self) -> str: + """Return prompt as string.""" + return self.text + + def to_messages(self) -> List[BaseMessage]: + """Return prompt as messages.""" + return [HumanMessage(content=self.text)] + + +class BasePromptTemplate(BaseModel, ABC): + """Base class for all prompt templates, returning a prompt.""" + + input_variables: List[str] + """A list of the names of the variables the prompt template expects.""" + output_parser: Optional[BaseOutputParser] = None + """How to parse the output of calling an LLM on this formatted prompt.""" + partial_variables: Mapping[str, Union[str, Callable[[], str]]] = Field( + default_factory=dict + ) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @abstractmethod + def format_prompt(self, **kwargs: Any) -> PromptValue: + """Create Chat Messages.""" + + @root_validator() + def validate_variable_names(cls, values: Dict) -> Dict: + """Validate variable names do not include restricted names.""" + if "stop" in values["input_variables"]: + raise ValueError( + "Cannot have an input variable named 'stop', as it is used internally," + " please rename." + ) + if "stop" in values["partial_variables"]: + raise ValueError( + "Cannot have an partial variable named 'stop', as it is used " + "internally, please rename." + ) + + overall = set(values["input_variables"]).intersection( + values["partial_variables"] + ) + if overall: + raise ValueError( + f"Found overlapping input and partial variables: {overall}" + ) + return values + + def partial(self, **kwargs: Union[str, Callable[[], str]]) -> BasePromptTemplate: + """Return a partial of the prompt template.""" + prompt_dict = self.__dict__.copy() + prompt_dict["input_variables"] = list( + set(self.input_variables).difference(kwargs) + ) + prompt_dict["partial_variables"] = {**self.partial_variables, **kwargs} + return type(self)(**prompt_dict) + + def _merge_partial_and_user_variables(self, **kwargs: Any) -> Dict[str, Any]: + # Get partial params: + partial_kwargs = { + k: v if isinstance(v, str) else v() + for k, v in self.partial_variables.items() + } + return {**partial_kwargs, **kwargs} + + @abstractmethod + def format(self, **kwargs: Any) -> str: + """Format the prompt with the inputs. + + Args: + kwargs: Any arguments to be passed to the prompt template. + + Returns: + A formatted string. + + Example: + + .. code-block:: python + + prompt.format(variable1="foo") + """ + + @property + @abstractmethod + def _prompt_type(self) -> str: + """Return the prompt type key.""" + + def dict(self, **kwargs: Any) -> Dict: + """Return dictionary representation of prompt.""" + prompt_dict = super().dict(**kwargs) + prompt_dict["_type"] = self._prompt_type + return prompt_dict + + def save(self, file_path: Union[Path, str]) -> None: + """Save the prompt. + + Args: + file_path: Path to directory to save prompt to. + + Example: + .. code-block:: python + + prompt.save(file_path="path/prompt.yaml") + """ + if self.partial_variables: + raise ValueError("Cannot save prompt with partial variables.") + # Convert file to Path object. + if isinstance(file_path, str): + save_path = Path(file_path) + else: + save_path = file_path + + directory_path = save_path.parent + directory_path.mkdir(parents=True, exist_ok=True) + + # Fetch dictionary to save + prompt_dict = self.dict() + + if save_path.suffix == ".json": + with open(file_path, "w") as f: + json.dump(prompt_dict, f, indent=4) + elif save_path.suffix == ".yaml": + with open(file_path, "w") as f: + yaml.dump(prompt_dict, f, default_flow_style=False) + else: + raise ValueError(f"{save_path} must be json or yaml") + + +class StringPromptTemplate(BasePromptTemplate, ABC): + """String prompt should expose the format method, returning a prompt.""" + + def format_prompt(self, **kwargs: Any) -> PromptValue: + """Create Chat Messages.""" + return StringPromptValue(text=self.format(**kwargs)) diff --git a/langchain/prompts/chat.py b/langchain/prompts/chat.py new file mode 100644 index 0000000000000000000000000000000000000000..87f67a6d0a556bb941fdc72a42883eaac1fd71f2 --- /dev/null +++ b/langchain/prompts/chat.py @@ -0,0 +1,187 @@ +"""Chat prompt template.""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Callable, List, Sequence, Tuple, Type, Union + +from pydantic import BaseModel, Field + +from langchain.memory.buffer import get_buffer_string +from langchain.prompts.base import BasePromptTemplate, StringPromptTemplate +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import ( + AIMessage, + BaseMessage, + ChatMessage, + HumanMessage, + PromptValue, + SystemMessage, +) + + +class BaseMessagePromptTemplate(BaseModel, ABC): + @abstractmethod + def format_messages(self, **kwargs: Any) -> List[BaseMessage]: + """To messages.""" + + @property + @abstractmethod + def input_variables(self) -> List[str]: + """Input variables for this prompt template.""" + + +class MessagesPlaceholder(BaseMessagePromptTemplate): + """Prompt template that assumes variable is already list of messages.""" + + variable_name: str + + def format_messages(self, **kwargs: Any) -> List[BaseMessage]: + """To a BaseMessage.""" + value = kwargs[self.variable_name] + if not isinstance(value, list): + raise ValueError( + f"variable {self.variable_name} should be a list of base messages, " + f"got {value}" + ) + for v in value: + if not isinstance(v, BaseMessage): + raise ValueError( + f"variable {self.variable_name} should be a list of base messages," + f" got {value}" + ) + return value + + @property + def input_variables(self) -> List[str]: + """Input variables for this prompt template.""" + return [self.variable_name] + + +class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC): + prompt: StringPromptTemplate + additional_kwargs: dict = Field(default_factory=dict) + + @classmethod + def from_template(cls, template: str, **kwargs: Any) -> BaseMessagePromptTemplate: + prompt = PromptTemplate.from_template(template) + return cls(prompt=prompt, **kwargs) + + @abstractmethod + def format(self, **kwargs: Any) -> BaseMessage: + """To a BaseMessage.""" + + def format_messages(self, **kwargs: Any) -> List[BaseMessage]: + return [self.format(**kwargs)] + + @property + def input_variables(self) -> List[str]: + return self.prompt.input_variables + + +class ChatMessagePromptTemplate(BaseStringMessagePromptTemplate): + role: str + + def format(self, **kwargs: Any) -> BaseMessage: + text = self.prompt.format(**kwargs) + return ChatMessage( + content=text, role=self.role, additional_kwargs=self.additional_kwargs + ) + + +class HumanMessagePromptTemplate(BaseStringMessagePromptTemplate): + def format(self, **kwargs: Any) -> BaseMessage: + text = self.prompt.format(**kwargs) + return HumanMessage(content=text, additional_kwargs=self.additional_kwargs) + + +class AIMessagePromptTemplate(BaseStringMessagePromptTemplate): + def format(self, **kwargs: Any) -> BaseMessage: + text = self.prompt.format(**kwargs) + return AIMessage(content=text, additional_kwargs=self.additional_kwargs) + + +class SystemMessagePromptTemplate(BaseStringMessagePromptTemplate): + def format(self, **kwargs: Any) -> BaseMessage: + text = self.prompt.format(**kwargs) + return SystemMessage(content=text, additional_kwargs=self.additional_kwargs) + + +class ChatPromptValue(PromptValue): + messages: List[BaseMessage] + + def to_string(self) -> str: + """Return prompt as string.""" + return get_buffer_string(self.messages) + + def to_messages(self) -> List[BaseMessage]: + """Return prompt as messages.""" + return self.messages + + +class ChatPromptTemplate(BasePromptTemplate, ABC): + input_variables: List[str] + messages: List[Union[BaseMessagePromptTemplate, BaseMessage]] + + @classmethod + def from_role_strings( + cls, string_messages: List[Tuple[str, str]] + ) -> ChatPromptTemplate: + messages = [ + ChatMessagePromptTemplate( + content=PromptTemplate.from_template(template), role=role + ) + for role, template in string_messages + ] + return cls.from_messages(messages) + + @classmethod + def from_strings( + cls, string_messages: List[Tuple[Type[BaseMessagePromptTemplate], str]] + ) -> ChatPromptTemplate: + messages = [ + role(content=PromptTemplate.from_template(template)) + for role, template in string_messages + ] + return cls.from_messages(messages) + + @classmethod + def from_messages( + cls, messages: Sequence[Union[BaseMessagePromptTemplate, BaseMessage]] + ) -> ChatPromptTemplate: + input_vars = set() + for message in messages: + if isinstance(message, BaseMessagePromptTemplate): + input_vars.update(message.input_variables) + return cls(input_variables=list(input_vars), messages=messages) + + def format(self, **kwargs: Any) -> str: + return self.format_prompt(**kwargs).to_string() + + def format_prompt(self, **kwargs: Any) -> PromptValue: + kwargs = self._merge_partial_and_user_variables(**kwargs) + result = [] + for message_template in self.messages: + if isinstance(message_template, BaseMessage): + result.extend([message_template]) + elif isinstance(message_template, BaseMessagePromptTemplate): + rel_params = { + k: v + for k, v in kwargs.items() + if k in message_template.input_variables + } + message = message_template.format_messages(**rel_params) + result.extend(message) + else: + raise ValueError(f"Unexpected input: {message_template}") + return ChatPromptValue(messages=result) + + def partial(self, **kwargs: Union[str, Callable[[], str]]) -> BasePromptTemplate: + raise NotImplementedError + + @property + def _prompt_type(self) -> str: + raise NotImplementedError + + def save(self, file_path: Union[Path, str]) -> None: + raise NotImplementedError diff --git a/langchain/prompts/example_selector/__init__.py b/langchain/prompts/example_selector/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6074488dabc2481c5d70c5e4a504fb8c4d30c90 --- /dev/null +++ b/langchain/prompts/example_selector/__init__.py @@ -0,0 +1,12 @@ +"""Logic for selecting examples to include in prompts.""" +from langchain.prompts.example_selector.length_based import LengthBasedExampleSelector +from langchain.prompts.example_selector.semantic_similarity import ( + MaxMarginalRelevanceExampleSelector, + SemanticSimilarityExampleSelector, +) + +__all__ = [ + "LengthBasedExampleSelector", + "SemanticSimilarityExampleSelector", + "MaxMarginalRelevanceExampleSelector", +] diff --git a/langchain/prompts/example_selector/__pycache__/__init__.cpython-39.pyc b/langchain/prompts/example_selector/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b69057d9f4473ae27219ed78b653bd024fe4b3ff Binary files /dev/null and b/langchain/prompts/example_selector/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/prompts/example_selector/__pycache__/base.cpython-39.pyc b/langchain/prompts/example_selector/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a3d9773ea649f9ef3cb434a5dea196258358d73 Binary files /dev/null and b/langchain/prompts/example_selector/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/prompts/example_selector/__pycache__/length_based.cpython-39.pyc b/langchain/prompts/example_selector/__pycache__/length_based.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8264606c9b5d25940e70cdbd148a22f9376154bf Binary files /dev/null and b/langchain/prompts/example_selector/__pycache__/length_based.cpython-39.pyc differ diff --git a/langchain/prompts/example_selector/__pycache__/semantic_similarity.cpython-39.pyc b/langchain/prompts/example_selector/__pycache__/semantic_similarity.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d9e0f77bf922b70c5ef7ce3e34845cccc9ce74b Binary files /dev/null and b/langchain/prompts/example_selector/__pycache__/semantic_similarity.cpython-39.pyc differ diff --git a/langchain/prompts/example_selector/base.py b/langchain/prompts/example_selector/base.py new file mode 100644 index 0000000000000000000000000000000000000000..ff2e099c8102f06f42208c08ecae2d069726c05f --- /dev/null +++ b/langchain/prompts/example_selector/base.py @@ -0,0 +1,15 @@ +"""Interface for selecting examples to include in prompts.""" +from abc import ABC, abstractmethod +from typing import Any, Dict, List + + +class BaseExampleSelector(ABC): + """Interface for selecting examples to include in prompts.""" + + @abstractmethod + def add_example(self, example: Dict[str, str]) -> Any: + """Add new example to store for a key.""" + + @abstractmethod + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Select which examples to use based on the inputs.""" diff --git a/langchain/prompts/example_selector/length_based.py b/langchain/prompts/example_selector/length_based.py new file mode 100644 index 0000000000000000000000000000000000000000..f6c665de322bf276824c3ab3430e6fdcfec7576a --- /dev/null +++ b/langchain/prompts/example_selector/length_based.py @@ -0,0 +1,64 @@ +"""Select examples based on length.""" +import re +from typing import Callable, Dict, List + +from pydantic import BaseModel, validator + +from langchain.prompts.example_selector.base import BaseExampleSelector +from langchain.prompts.prompt import PromptTemplate + + +def _get_length_based(text: str) -> int: + return len(re.split("\n| ", text)) + + +class LengthBasedExampleSelector(BaseExampleSelector, BaseModel): + """Select examples based on length.""" + + examples: List[dict] + """A list of the examples that the prompt template expects.""" + + example_prompt: PromptTemplate + """Prompt template used to format the examples.""" + + get_text_length: Callable[[str], int] = _get_length_based + """Function to measure prompt length. Defaults to word count.""" + + max_length: int = 2048 + """Max length for the prompt, beyond which examples are cut.""" + + example_text_lengths: List[int] = [] #: :meta private: + + def add_example(self, example: Dict[str, str]) -> None: + """Add new example to list.""" + self.examples.append(example) + string_example = self.example_prompt.format(**example) + self.example_text_lengths.append(self.get_text_length(string_example)) + + @validator("example_text_lengths", always=True) + def calculate_example_text_lengths(cls, v: List[int], values: Dict) -> List[int]: + """Calculate text lengths if they don't exist.""" + # Check if text lengths were passed in + if v: + return v + # If they were not, calculate them + example_prompt = values["example_prompt"] + get_text_length = values["get_text_length"] + string_examples = [example_prompt.format(**eg) for eg in values["examples"]] + return [get_text_length(eg) for eg in string_examples] + + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Select which examples to use based on the input lengths.""" + inputs = " ".join(input_variables.values()) + remaining_length = self.max_length - self.get_text_length(inputs) + i = 0 + examples = [] + while remaining_length > 0 and i < len(self.examples): + new_length = remaining_length - self.example_text_lengths[i] + if new_length < 0: + break + else: + examples.append(self.examples[i]) + remaining_length = new_length + i += 1 + return examples diff --git a/langchain/prompts/example_selector/ngram_overlap.py b/langchain/prompts/example_selector/ngram_overlap.py new file mode 100644 index 0000000000000000000000000000000000000000..335331ec1bf7104000a1b54aea47e6024f8c25d9 --- /dev/null +++ b/langchain/prompts/example_selector/ngram_overlap.py @@ -0,0 +1,112 @@ +"""Select and order examples based on ngram overlap score (sentence_bleu score). + +https://www.nltk.org/_modules/nltk/translate/bleu_score.html +https://aclanthology.org/P02-1040.pdf +""" +from typing import Dict, List + +import numpy as np +from pydantic import BaseModel, root_validator + +from langchain.prompts.example_selector.base import BaseExampleSelector +from langchain.prompts.prompt import PromptTemplate + + +def ngram_overlap_score(source: List[str], example: List[str]) -> float: + """Compute ngram overlap score of source and example as sentence_bleu score. + + Use sentence_bleu with method1 smoothing function and auto reweighting. + Return float value between 0.0 and 1.0 inclusive. + https://www.nltk.org/_modules/nltk/translate/bleu_score.html + https://aclanthology.org/P02-1040.pdf + """ + from nltk.translate.bleu_score import ( # type: ignore + SmoothingFunction, + sentence_bleu, + ) + + hypotheses = source[0].split() + references = [s.split() for s in example] + + return float( + sentence_bleu( + references, + hypotheses, + smoothing_function=SmoothingFunction().method1, + auto_reweigh=True, + ) + ) + + +class NGramOverlapExampleSelector(BaseExampleSelector, BaseModel): + """Select and order examples based on ngram overlap score (sentence_bleu score). + + https://www.nltk.org/_modules/nltk/translate/bleu_score.html + https://aclanthology.org/P02-1040.pdf + """ + + examples: List[dict] + """A list of the examples that the prompt template expects.""" + + example_prompt: PromptTemplate + """Prompt template used to format the examples.""" + + threshold: float = -1.0 + """Threshold at which algorithm stops. Set to -1.0 by default. + + For negative threshold: + select_examples sorts examples by ngram_overlap_score, but excludes none. + For threshold greater than 1.0: + select_examples excludes all examples, and returns an empty list. + For threshold equal to 0.0: + select_examples sorts examples by ngram_overlap_score, + and excludes examples with no ngram overlap with input. + """ + + @root_validator(pre=True) + def check_dependencies(cls, values: Dict) -> Dict: + """Check that valid dependencies exist.""" + try: + from nltk.translate.bleu_score import ( # noqa: disable=F401 + SmoothingFunction, + sentence_bleu, + ) + except ImportError as e: + raise ValueError( + "Not all the correct dependencies for this ExampleSelect exist" + ) from e + + return values + + def add_example(self, example: Dict[str, str]) -> None: + """Add new example to list.""" + self.examples.append(example) + + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Return list of examples sorted by ngram_overlap_score with input. + + Descending order. + Excludes any examples with ngram_overlap_score less than or equal to threshold. + """ + inputs = list(input_variables.values()) + examples = [] + k = len(self.examples) + score = [0.0] * k + first_prompt_template_key = self.example_prompt.input_variables[0] + + for i in range(k): + score[i] = ngram_overlap_score( + inputs, [self.examples[i][first_prompt_template_key]] + ) + + while True: + arg_max = np.argmax(score) + if (score[arg_max] < self.threshold) or abs( + score[arg_max] - self.threshold + ) < 1e-9: + break + + examples.append(self.examples[arg_max]) + score[arg_max] = self.threshold - 1.0 + + return examples diff --git a/langchain/prompts/example_selector/semantic_similarity.py b/langchain/prompts/example_selector/semantic_similarity.py new file mode 100644 index 0000000000000000000000000000000000000000..604a04e69c343a9f3d86532c508478d2772bf84c --- /dev/null +++ b/langchain/prompts/example_selector/semantic_similarity.py @@ -0,0 +1,166 @@ +"""Example selector that selects examples based on SemanticSimilarity.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Type + +from pydantic import BaseModel, Extra + +from langchain.embeddings.base import Embeddings +from langchain.prompts.example_selector.base import BaseExampleSelector +from langchain.vectorstores.base import VectorStore + + +def sorted_values(values: Dict[str, str]) -> List[Any]: + """Return a list of values in dict sorted by key.""" + return [values[val] for val in sorted(values)] + + +class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): + """Example selector that selects examples based on SemanticSimilarity.""" + + vectorstore: VectorStore + """VectorStore than contains information about examples.""" + k: int = 4 + """Number of examples to select.""" + example_keys: Optional[List[str]] = None + """Optional keys to filter examples to.""" + input_keys: Optional[List[str]] = None + """Optional keys to filter input to. If provided, the search is based on + the input variables instead of all variables.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def add_example(self, example: Dict[str, str]) -> str: + """Add new example to vectorstore.""" + if self.input_keys: + string_example = " ".join( + sorted_values({key: example[key] for key in self.input_keys}) + ) + else: + string_example = " ".join(sorted_values(example)) + ids = self.vectorstore.add_texts([string_example], metadatas=[example]) + return ids[0] + + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Select which examples to use based on semantic similarity.""" + # Get the docs with the highest similarity. + if self.input_keys: + input_variables = {key: input_variables[key] for key in self.input_keys} + query = " ".join(sorted_values(input_variables)) + example_docs = self.vectorstore.similarity_search(query, k=self.k) + # Get the examples from the metadata. + # This assumes that examples are stored in metadata. + examples = [dict(e.metadata) for e in example_docs] + # If example keys are provided, filter examples to those keys. + if self.example_keys: + examples = [{k: eg[k] for k in self.example_keys} for eg in examples] + return examples + + @classmethod + def from_examples( + cls, + examples: List[dict], + embeddings: Embeddings, + vectorstore_cls: Type[VectorStore], + k: int = 4, + input_keys: Optional[List[str]] = None, + **vectorstore_cls_kwargs: Any, + ) -> SemanticSimilarityExampleSelector: + """Create k-shot example selector using example list and embeddings. + + Reshuffles examples dynamically based on query similarity. + + Args: + examples: List of examples to use in the prompt. + embeddings: An iniialized embedding API interface, e.g. OpenAIEmbeddings(). + vectorstore_cls: A vector store DB interface class, e.g. FAISS. + k: Number of examples to select + input_keys: If provided, the search is based on the input variables + instead of all variables. + vectorstore_cls_kwargs: optional kwargs containing url for vector store + + Returns: + The ExampleSelector instantiated, backed by a vector store. + """ + if input_keys: + string_examples = [ + " ".join(sorted_values({k: eg[k] for k in input_keys})) + for eg in examples + ] + else: + string_examples = [" ".join(sorted_values(eg)) for eg in examples] + vectorstore = vectorstore_cls.from_texts( + string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs + ) + return cls(vectorstore=vectorstore, k=k, input_keys=input_keys) + + +class MaxMarginalRelevanceExampleSelector(SemanticSimilarityExampleSelector, BaseModel): + """ExampleSelector that selects examples based on Max Marginal Relevance. + + This was shown to improve performance in this paper: + https://arxiv.org/pdf/2211.13892.pdf + """ + + fetch_k: int = 20 + """Number of examples to fetch to rerank.""" + + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Select which examples to use based on semantic similarity.""" + # Get the docs with the highest similarity. + if self.input_keys: + input_variables = {key: input_variables[key] for key in self.input_keys} + query = " ".join(sorted_values(input_variables)) + example_docs = self.vectorstore.max_marginal_relevance_search( + query, k=self.k, fetch_k=self.fetch_k + ) + # Get the examples from the metadata. + # This assumes that examples are stored in metadata. + examples = [dict(e.metadata) for e in example_docs] + # If example keys are provided, filter examples to those keys. + if self.example_keys: + examples = [{k: eg[k] for k in self.example_keys} for eg in examples] + return examples + + @classmethod + def from_examples( + cls, + examples: List[dict], + embeddings: Embeddings, + vectorstore_cls: Type[VectorStore], + k: int = 4, + input_keys: Optional[List[str]] = None, + fetch_k: int = 20, + **vectorstore_cls_kwargs: Any, + ) -> MaxMarginalRelevanceExampleSelector: + """Create k-shot example selector using example list and embeddings. + + Reshuffles examples dynamically based on query similarity. + + Args: + examples: List of examples to use in the prompt. + embeddings: An iniialized embedding API interface, e.g. OpenAIEmbeddings(). + vectorstore_cls: A vector store DB interface class, e.g. FAISS. + k: Number of examples to select + input_keys: If provided, the search is based on the input variables + instead of all variables. + vectorstore_cls_kwargs: optional kwargs containing url for vector store + + Returns: + The ExampleSelector instantiated, backed by a vector store. + """ + if input_keys: + string_examples = [ + " ".join(sorted_values({k: eg[k] for k in input_keys})) + for eg in examples + ] + else: + string_examples = [" ".join(sorted_values(eg)) for eg in examples] + vectorstore = vectorstore_cls.from_texts( + string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs + ) + return cls(vectorstore=vectorstore, k=k, fetch_k=fetch_k, input_keys=input_keys) diff --git a/langchain/prompts/few_shot.py b/langchain/prompts/few_shot.py new file mode 100644 index 0000000000000000000000000000000000000000..3b0656b05dd953ace57f4d4525ea3465e8ed768a --- /dev/null +++ b/langchain/prompts/few_shot.py @@ -0,0 +1,127 @@ +"""Prompt template that contains few shot examples.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.prompts.base import ( + DEFAULT_FORMATTER_MAPPING, + StringPromptTemplate, + check_valid_template, +) +from langchain.prompts.example_selector.base import BaseExampleSelector +from langchain.prompts.prompt import PromptTemplate + + +class FewShotPromptTemplate(StringPromptTemplate, BaseModel): + """Prompt template that contains few shot examples.""" + + examples: Optional[List[dict]] = None + """Examples to format into the prompt. + Either this or example_selector should be provided.""" + + example_selector: Optional[BaseExampleSelector] = None + """ExampleSelector to choose the examples to format into the prompt. + Either this or examples should be provided.""" + + example_prompt: PromptTemplate + """PromptTemplate used to format an individual example.""" + + suffix: str + """A prompt template string to put after the examples.""" + + input_variables: List[str] + """A list of the names of the variables the prompt template expects.""" + + example_separator: str = "\n\n" + """String separator used to join the prefix, the examples, and suffix.""" + + prefix: str = "" + """A prompt template string to put before the examples.""" + + template_format: str = "f-string" + """The format of the prompt template. Options are: 'f-string', 'jinja2'.""" + + validate_template: bool = True + """Whether or not to try validating the template.""" + + @root_validator(pre=True) + def check_examples_and_selector(cls, values: Dict) -> Dict: + """Check that one and only one of examples/example_selector are provided.""" + examples = values.get("examples", None) + example_selector = values.get("example_selector", None) + if examples and example_selector: + raise ValueError( + "Only one of 'examples' and 'example_selector' should be provided" + ) + + if examples is None and example_selector is None: + raise ValueError( + "One of 'examples' and 'example_selector' should be provided" + ) + + return values + + @root_validator() + def template_is_valid(cls, values: Dict) -> Dict: + """Check that prefix, suffix and input variables are consistent.""" + if values["validate_template"]: + check_valid_template( + values["prefix"] + values["suffix"], + values["template_format"], + values["input_variables"] + list(values["partial_variables"]), + ) + return values + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def _get_examples(self, **kwargs: Any) -> List[dict]: + if self.examples is not None: + return self.examples + elif self.example_selector is not None: + return self.example_selector.select_examples(kwargs) + else: + raise ValueError + + def format(self, **kwargs: Any) -> str: + """Format the prompt with the inputs. + + Args: + kwargs: Any arguments to be passed to the prompt template. + + Returns: + A formatted string. + + Example: + + .. code-block:: python + + prompt.format(variable1="foo") + """ + kwargs = self._merge_partial_and_user_variables(**kwargs) + # Get the examples to use. + examples = self._get_examples(**kwargs) + # Format the examples. + example_strings = [ + self.example_prompt.format(**example) for example in examples + ] + # Create the overall template. + pieces = [self.prefix, *example_strings, self.suffix] + template = self.example_separator.join([piece for piece in pieces if piece]) + + # Format the template with the input variables. + return DEFAULT_FORMATTER_MAPPING[self.template_format](template, **kwargs) + + @property + def _prompt_type(self) -> str: + """Return the prompt type key.""" + return "few_shot" + + def dict(self, **kwargs: Any) -> Dict: + """Return a dictionary of the prompt.""" + if self.example_selector: + raise ValueError("Saving an example selector is not currently supported") + return super().dict(**kwargs) diff --git a/langchain/prompts/few_shot_with_templates.py b/langchain/prompts/few_shot_with_templates.py new file mode 100644 index 0000000000000000000000000000000000000000..c37dd19dc99cbfc03d294a0bc99f3975bf9c87fe --- /dev/null +++ b/langchain/prompts/few_shot_with_templates.py @@ -0,0 +1,151 @@ +"""Prompt template that contains few shot examples.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.prompts.base import ( + DEFAULT_FORMATTER_MAPPING, + StringPromptTemplate, +) +from langchain.prompts.example_selector.base import BaseExampleSelector +from langchain.prompts.prompt import PromptTemplate + + +class FewShotPromptWithTemplates(StringPromptTemplate, BaseModel): + """Prompt template that contains few shot examples.""" + + examples: Optional[List[dict]] = None + """Examples to format into the prompt. + Either this or example_selector should be provided.""" + + example_selector: Optional[BaseExampleSelector] = None + """ExampleSelector to choose the examples to format into the prompt. + Either this or examples should be provided.""" + + example_prompt: PromptTemplate + """PromptTemplate used to format an individual example.""" + + suffix: StringPromptTemplate + """A PromptTemplate to put after the examples.""" + + input_variables: List[str] + """A list of the names of the variables the prompt template expects.""" + + example_separator: str = "\n\n" + """String separator used to join the prefix, the examples, and suffix.""" + + prefix: Optional[StringPromptTemplate] = None + """A PromptTemplate to put before the examples.""" + + template_format: str = "f-string" + """The format of the prompt template. Options are: 'f-string', 'jinja2'.""" + + validate_template: bool = True + """Whether or not to try validating the template.""" + + @root_validator(pre=True) + def check_examples_and_selector(cls, values: Dict) -> Dict: + """Check that one and only one of examples/example_selector are provided.""" + examples = values.get("examples", None) + example_selector = values.get("example_selector", None) + if examples and example_selector: + raise ValueError( + "Only one of 'examples' and 'example_selector' should be provided" + ) + + if examples is None and example_selector is None: + raise ValueError( + "One of 'examples' and 'example_selector' should be provided" + ) + + return values + + @root_validator() + def template_is_valid(cls, values: Dict) -> Dict: + """Check that prefix, suffix and input variables are consistent.""" + if values["validate_template"]: + input_variables = values["input_variables"] + expected_input_variables = set(values["suffix"].input_variables) + expected_input_variables |= set(values["partial_variables"]) + if values["prefix"] is not None: + expected_input_variables |= set(values["prefix"].input_variables) + missing_vars = expected_input_variables.difference(input_variables) + if missing_vars: + raise ValueError( + f"Got input_variables={input_variables}, but based on " + f"prefix/suffix expected {expected_input_variables}" + ) + return values + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def _get_examples(self, **kwargs: Any) -> List[dict]: + if self.examples is not None: + return self.examples + elif self.example_selector is not None: + return self.example_selector.select_examples(kwargs) + else: + raise ValueError + + def format(self, **kwargs: Any) -> str: + """Format the prompt with the inputs. + + Args: + kwargs: Any arguments to be passed to the prompt template. + + Returns: + A formatted string. + + Example: + + .. code-block:: python + + prompt.format(variable1="foo") + """ + kwargs = self._merge_partial_and_user_variables(**kwargs) + # Get the examples to use. + examples = self._get_examples(**kwargs) + # Format the examples. + example_strings = [ + self.example_prompt.format(**example) for example in examples + ] + # Create the overall prefix. + if self.prefix is None: + prefix = "" + else: + prefix_kwargs = { + k: v for k, v in kwargs.items() if k in self.prefix.input_variables + } + for k in prefix_kwargs.keys(): + kwargs.pop(k) + prefix = self.prefix.format(**prefix_kwargs) + + # Create the overall suffix + suffix_kwargs = { + k: v for k, v in kwargs.items() if k in self.suffix.input_variables + } + for k in suffix_kwargs.keys(): + kwargs.pop(k) + suffix = self.suffix.format( + **suffix_kwargs, + ) + + pieces = [prefix, *example_strings, suffix] + template = self.example_separator.join([piece for piece in pieces if piece]) + # Format the template with the input variables. + return DEFAULT_FORMATTER_MAPPING[self.template_format](template, **kwargs) + + @property + def _prompt_type(self) -> str: + """Return the prompt type key.""" + return "few_shot_with_templates" + + def dict(self, **kwargs: Any) -> Dict: + """Return a dictionary of the prompt.""" + if self.example_selector: + raise ValueError("Saving an example selector is not currently supported") + return super().dict(**kwargs) diff --git a/langchain/prompts/loading.py b/langchain/prompts/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..c8492979248e98a40ab36d62e6b9fffbaab12d53 --- /dev/null +++ b/langchain/prompts/loading.py @@ -0,0 +1,164 @@ +"""Load prompts from disk.""" +import importlib +import json +import logging +from pathlib import Path +from typing import Union + +import yaml + +from langchain.output_parsers.regex import RegexParser +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate +from langchain.utilities.loading import try_load_from_hub + +URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/prompts/" +logger = logging.getLogger(__file__) + + +def load_prompt_from_config(config: dict) -> BasePromptTemplate: + """Load prompt from Config Dict.""" + if "_type" not in config: + logger.warning("No `_type` key found, defaulting to `prompt`.") + config_type = config.pop("_type", "prompt") + + if config_type not in type_to_loader_dict: + raise ValueError(f"Loading {config_type} prompt not supported") + + prompt_loader = type_to_loader_dict[config_type] + return prompt_loader(config) + + +def _load_template(var_name: str, config: dict) -> dict: + """Load template from disk if applicable.""" + # Check if template_path exists in config. + if f"{var_name}_path" in config: + # If it does, make sure template variable doesn't also exist. + if var_name in config: + raise ValueError( + f"Both `{var_name}_path` and `{var_name}` cannot be provided." + ) + # Pop the template path from the config. + template_path = Path(config.pop(f"{var_name}_path")) + # Load the template. + if template_path.suffix == ".txt": + with open(template_path) as f: + template = f.read() + else: + raise ValueError + # Set the template variable to the extracted variable. + config[var_name] = template + return config + + +def _load_examples(config: dict) -> dict: + """Load examples if necessary.""" + if isinstance(config["examples"], list): + pass + elif isinstance(config["examples"], str): + with open(config["examples"]) as f: + if config["examples"].endswith(".json"): + examples = json.load(f) + elif config["examples"].endswith((".yaml", ".yml")): + examples = yaml.safe_load(f) + else: + raise ValueError( + "Invalid file format. Only json or yaml formats are supported." + ) + config["examples"] = examples + else: + raise ValueError("Invalid examples format. Only list or string are supported.") + return config + + +def _load_output_parser(config: dict) -> dict: + """Load output parser.""" + if "output_parsers" in config: + if config["output_parsers"] is not None: + _config = config["output_parsers"] + output_parser_type = _config["_type"] + if output_parser_type == "regex_parser": + output_parser = RegexParser(**_config) + else: + raise ValueError(f"Unsupported output parser {output_parser_type}") + config["output_parsers"] = output_parser + return config + + +def _load_few_shot_prompt(config: dict) -> FewShotPromptTemplate: + """Load the few shot prompt from the config.""" + # Load the suffix and prefix templates. + config = _load_template("suffix", config) + config = _load_template("prefix", config) + # Load the example prompt. + if "example_prompt_path" in config: + if "example_prompt" in config: + raise ValueError( + "Only one of example_prompt and example_prompt_path should " + "be specified." + ) + config["example_prompt"] = load_prompt(config.pop("example_prompt_path")) + else: + config["example_prompt"] = load_prompt_from_config(config["example_prompt"]) + # Load the examples. + config = _load_examples(config) + config = _load_output_parser(config) + return FewShotPromptTemplate(**config) + + +def _load_prompt(config: dict) -> PromptTemplate: + """Load the prompt template from config.""" + # Load the template from disk if necessary. + config = _load_template("template", config) + config = _load_output_parser(config) + return PromptTemplate(**config) + + +def load_prompt(path: Union[str, Path]) -> BasePromptTemplate: + """Unified method for loading a prompt from LangChainHub or local fs.""" + if hub_result := try_load_from_hub( + path, _load_prompt_from_file, "prompts", {"py", "json", "yaml"} + ): + return hub_result + else: + return _load_prompt_from_file(path) + + +def _load_prompt_from_file(file: Union[str, Path]) -> BasePromptTemplate: + """Load prompt from file.""" + # Convert file to Path object. + if isinstance(file, str): + file_path = Path(file) + else: + file_path = file + # Load from either json or yaml. + if file_path.suffix == ".json": + with open(file_path) as f: + config = json.load(f) + elif file_path.suffix == ".yaml": + with open(file_path, "r") as f: + config = yaml.safe_load(f) + elif file_path.suffix == ".py": + spec = importlib.util.spec_from_loader( + "prompt", loader=None, origin=str(file_path) + ) + if spec is None: + raise ValueError("could not load spec") + helper = importlib.util.module_from_spec(spec) + with open(file_path, "rb") as f: + exec(f.read(), helper.__dict__) + if not isinstance(helper.PROMPT, BasePromptTemplate): + raise ValueError("Did not get object of type BasePromptTemplate.") + return helper.PROMPT + else: + raise ValueError(f"Got unsupported file type {file_path.suffix}") + # Load the prompt from the config now. + return load_prompt_from_config(config) + + +type_to_loader_dict = { + "prompt": _load_prompt, + "few_shot": _load_few_shot_prompt, + # "few_shot_with_templates": _load_few_shot_with_templates_prompt, +} diff --git a/langchain/prompts/prompt.py b/langchain/prompts/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..f2cf0aed5a7f9b741df0dcca9b288a38c539da0d --- /dev/null +++ b/langchain/prompts/prompt.py @@ -0,0 +1,134 @@ +"""Prompt schema definition.""" +from __future__ import annotations + +from pathlib import Path +from string import Formatter +from typing import Any, Dict, List, Union + +from pydantic import BaseModel, Extra, root_validator + +from langchain.prompts.base import ( + DEFAULT_FORMATTER_MAPPING, + StringPromptTemplate, + check_valid_template, +) + + +class PromptTemplate(StringPromptTemplate, BaseModel): + """Schema to represent a prompt for an LLM. + + Example: + .. code-block:: python + + from langchain import PromptTemplate + prompt = PromptTemplate(input_variables=["foo"], template="Say {foo}") + """ + + input_variables: List[str] + """A list of the names of the variables the prompt template expects.""" + + template: str + """The prompt template.""" + + template_format: str = "f-string" + """The format of the prompt template. Options are: 'f-string', 'jinja2'.""" + + validate_template: bool = True + """Whether or not to try validating the template.""" + + @property + def _prompt_type(self) -> str: + """Return the prompt type key.""" + return "prompt" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def format(self, **kwargs: Any) -> str: + """Format the prompt with the inputs. + + Args: + kwargs: Any arguments to be passed to the prompt template. + + Returns: + A formatted string. + + Example: + + .. code-block:: python + + prompt.format(variable1="foo") + """ + kwargs = self._merge_partial_and_user_variables(**kwargs) + return DEFAULT_FORMATTER_MAPPING[self.template_format](self.template, **kwargs) + + @root_validator() + def template_is_valid(cls, values: Dict) -> Dict: + """Check that template and input variables are consistent.""" + if values["validate_template"]: + all_inputs = values["input_variables"] + list(values["partial_variables"]) + check_valid_template( + values["template"], values["template_format"], all_inputs + ) + return values + + @classmethod + def from_examples( + cls, + examples: List[str], + suffix: str, + input_variables: List[str], + example_separator: str = "\n\n", + prefix: str = "", + ) -> PromptTemplate: + """Take examples in list format with prefix and suffix to create a prompt. + + Intended be used as a way to dynamically create a prompt from examples. + + Args: + examples: List of examples to use in the prompt. + suffix: String to go after the list of examples. Should generally + set up the user's input. + input_variables: A list of variable names the final prompt template + will expect. + example_separator: The separator to use in between examples. Defaults + to two new line characters. + prefix: String that should go before any examples. Generally includes + examples. Default to an empty string. + + Returns: + The final prompt generated. + """ + template = example_separator.join([prefix, *examples, suffix]) + return cls(input_variables=input_variables, template=template) + + @classmethod + def from_file( + cls, template_file: Union[str, Path], input_variables: List[str] + ) -> PromptTemplate: + """Load a prompt from a file. + + Args: + template_file: The path to the file containing the prompt template. + input_variables: A list of variable names the final prompt template + will expect. + Returns: + The prompt loaded from the file. + """ + with open(str(template_file), "r") as f: + template = f.read() + return cls(input_variables=input_variables, template=template) + + @classmethod + def from_template(cls, template: str) -> PromptTemplate: + """Load a prompt template from a template.""" + input_variables = { + v for _, v, _, _ in Formatter().parse(template) if v is not None + } + return cls(input_variables=list(sorted(input_variables)), template=template) + + +# For backwards compatibility. +Prompt = PromptTemplate diff --git a/langchain/py.typed b/langchain/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/langchain/python.py b/langchain/python.py new file mode 100644 index 0000000000000000000000000000000000000000..a14bc2e13e9850c26dcc69901a63335d625a3806 --- /dev/null +++ b/langchain/python.py @@ -0,0 +1,26 @@ +"""Mock Python REPL.""" +import sys +from io import StringIO +from typing import Dict, Optional + +from pydantic import BaseModel, Field + + +class PythonREPL(BaseModel): + """Simulates a standalone Python REPL.""" + + globals: Optional[Dict] = Field(default_factory=dict, alias="_globals") + locals: Optional[Dict] = Field(default_factory=dict, alias="_locals") + + def run(self, command: str) -> str: + """Run command with own globals/locals and returns anything printed.""" + old_stdout = sys.stdout + sys.stdout = mystdout = StringIO() + try: + exec(command, self.globals, self.locals) + sys.stdout = old_stdout + output = mystdout.getvalue() + except Exception as e: + sys.stdout = old_stdout + output = str(e) + return output diff --git a/langchain/requests.py b/langchain/requests.py new file mode 100644 index 0000000000000000000000000000000000000000..60efc634af7daa0f0cfbf40f6b07378d1e2b086b --- /dev/null +++ b/langchain/requests.py @@ -0,0 +1,73 @@ +"""Lightweight wrapper around requests library, with async support.""" +from typing import Any, Dict, Optional + +import aiohttp +import requests +from pydantic import BaseModel, Extra + + +class RequestsWrapper(BaseModel): + """Lightweight wrapper around requests library.""" + + headers: Optional[Dict[str, str]] = None + aiosession: Optional[aiohttp.ClientSession] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + def get(self, url: str) -> str: + """GET the URL and return the text.""" + return requests.get(url, headers=self.headers).text + + def post(self, url: str, data: Dict[str, Any]) -> str: + """POST to the URL and return the text.""" + return requests.post(url, json=data, headers=self.headers).text + + def patch(self, url: str, data: Dict[str, Any]) -> str: + """PATCH the URL and return the text.""" + return requests.patch(url, json=data, headers=self.headers).text + + def put(self, url: str, data: Dict[str, Any]) -> str: + """PUT the URL and return the text.""" + return requests.put(url, json=data, headers=self.headers).text + + def delete(self, url: str) -> str: + """DELETE the URL and return the text.""" + return requests.delete(url, headers=self.headers).text + + async def _arequest(self, method: str, url: str, **kwargs: Any) -> str: + """Make an async request.""" + if not self.aiosession: + async with aiohttp.ClientSession() as session: + async with session.request( + method, url, headers=self.headers, **kwargs + ) as response: + return await response.text() + else: + async with self.aiosession.request( + method, url, headers=self.headers, **kwargs + ) as response: + return await response.text() + + async def aget(self, url: str) -> str: + """GET the URL and return the text asynchronously.""" + return await self._arequest("GET", url) + + async def apost(self, url: str, data: Dict[str, Any]) -> str: + """POST to the URL and return the text asynchronously.""" + return await self._arequest("POST", url, json=data) + + async def apatch(self, url: str, data: Dict[str, Any]) -> str: + """PATCH the URL and return the text asynchronously.""" + return await self._arequest("PATCH", url, json=data) + + async def aput(self, url: str, data: Dict[str, Any]) -> str: + """PUT the URL and return the text asynchronously.""" + return await self._arequest("PUT", url, json=data) + + async def adelete(self, url: str) -> str: + """DELETE the URL and return the text asynchronously.""" + return await self._arequest("DELETE", url) diff --git a/langchain/schema.py b/langchain/schema.py new file mode 100644 index 0000000000000000000000000000000000000000..ec3240caefb2ff01cabd6d8da4de282d8a50dcba --- /dev/null +++ b/langchain/schema.py @@ -0,0 +1,338 @@ +"""Common schema objects.""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, NamedTuple, Optional + +from pydantic import BaseModel, Extra, Field, root_validator + + +def get_buffer_string( + messages: List[BaseMessage], human_prefix: str = "Human", ai_prefix: str = "AI" +) -> str: + """Get buffer string of messages.""" + string_messages = [] + for m in messages: + if isinstance(m, HumanMessage): + role = human_prefix + elif isinstance(m, AIMessage): + role = ai_prefix + elif isinstance(m, SystemMessage): + role = "System" + elif isinstance(m, ChatMessage): + role = m.role + else: + raise ValueError(f"Got unsupported message type: {m}") + string_messages.append(f"{role}: {m.content}") + return "\n".join(string_messages) + + +class AgentAction(NamedTuple): + """Agent's action to take.""" + + tool: str + tool_input: str + log: str + + +class AgentFinish(NamedTuple): + """Agent's return value.""" + + return_values: dict + log: str + + +class AgentClarify(NamedTuple): + """Agent's clarification request.""" + + question: str + log: str + + +class Generation(BaseModel): + """Output of a single generation.""" + + text: str + """Generated text output.""" + + generation_info: Optional[Dict[str, Any]] = None + """Raw generation info response from the provider""" + """May include things like reason for finishing (e.g. in OpenAI)""" + # TODO: add log probs + + +class BaseMessage(BaseModel): + """Message object.""" + + content: str + additional_kwargs: dict = Field(default_factory=dict) + + @property + @abstractmethod + def type(self) -> str: + """Type of the message, used for serialization.""" + + +class HumanMessage(BaseMessage): + """Type of message that is spoken by the human.""" + + @property + def type(self) -> str: + """Type of the message, used for serialization.""" + return "human" + + +class AIMessage(BaseMessage): + """Type of message that is spoken by the AI.""" + + @property + def type(self) -> str: + """Type of the message, used for serialization.""" + return "ai" + + +class SystemMessage(BaseMessage): + """Type of message that is a system message.""" + + @property + def type(self) -> str: + """Type of the message, used for serialization.""" + return "system" + + +class ChatMessage(BaseMessage): + """Type of message with arbitrary speaker.""" + + role: str + + @property + def type(self) -> str: + """Type of the message, used for serialization.""" + return "chat" + + +def _message_to_dict(message: BaseMessage) -> dict: + return {"type": message.type, "data": message.dict()} + + +def messages_to_dict(messages: List[BaseMessage]) -> List[dict]: + return [_message_to_dict(m) for m in messages] + + +def _message_from_dict(message: dict) -> BaseMessage: + _type = message["type"] + if _type == "human": + return HumanMessage(**message["data"]) + elif _type == "ai": + return AIMessage(**message["data"]) + elif _type == "system": + return SystemMessage(**message["data"]) + elif _type == "chat": + return ChatMessage(**message["data"]) + else: + raise ValueError(f"Got unexpected type: {_type}") + + +def messages_from_dict(messages: List[dict]) -> List[BaseMessage]: + return [_message_from_dict(m) for m in messages] + + +class ChatGeneration(Generation): + """Output of a single generation.""" + + text = "" + message: BaseMessage + + @root_validator + def set_text(cls, values: Dict[str, Any]) -> Dict[str, Any]: + values["text"] = values["message"].content + return values + + +class ChatResult(BaseModel): + """Class that contains all relevant information for a Chat Result.""" + + generations: List[ChatGeneration] + """List of the things generated.""" + llm_output: Optional[dict] = None + """For arbitrary LLM provider specific output.""" + + +class LLMResult(BaseModel): + """Class that contains all relevant information for an LLM Result.""" + + generations: List[List[Generation]] + """List of the things generated. This is List[List[]] because + each input could have multiple generations.""" + llm_output: Optional[dict] = None + """For arbitrary LLM provider specific output.""" + + +class PromptValue(BaseModel, ABC): + @abstractmethod + def to_string(self) -> str: + """Return prompt as string.""" + + @abstractmethod + def to_messages(self) -> List[BaseMessage]: + """Return prompt as messages.""" + + +class BaseLanguageModel(BaseModel, ABC): + @abstractmethod + def generate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + """Take in a list of prompt values and return an LLMResult.""" + + @abstractmethod + async def agenerate_prompt( + self, prompts: List[PromptValue], stop: Optional[List[str]] = None + ) -> LLMResult: + """Take in a list of prompt values and return an LLMResult.""" + + def get_num_tokens(self, text: str) -> int: + """Get the number of tokens present in the text.""" + # TODO: this method may not be exact. + # TODO: this method may differ based on model (eg codex). + try: + from transformers import GPT2TokenizerFast + except ImportError: + raise ValueError( + "Could not import transformers python package. " + "This is needed in order to calculate get_num_tokens. " + "Please it install it with `pip install transformers`." + ) + # create a GPT-3 tokenizer instance + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + + # tokenize the text using the GPT-3 tokenizer + tokenized_text = tokenizer.tokenize(text) + + # calculate the number of tokens in the tokenized text + return len(tokenized_text) + + def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int: + """Get the number of tokens in the message.""" + return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages]) + + +class BaseMemory(BaseModel, ABC): + """Base interface for memory in chains.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + @abstractmethod + def memory_variables(self) -> List[str]: + """Input keys this memory class will load dynamically.""" + + @abstractmethod + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return key-value pairs given the text input to the chain. + + If None, return all memories + """ + + @abstractmethod + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save the context of this model run to memory.""" + + @abstractmethod + def clear(self) -> None: + """Clear memory contents.""" + + +class Document(BaseModel): + """Interface for interacting with a document.""" + + page_content: str + lookup_str: str = "" + lookup_index = 0 + metadata: dict = Field(default_factory=dict) + + @property + def paragraphs(self) -> List[str]: + """Paragraphs of the page.""" + return self.page_content.split("\n\n") + + @property + def summary(self) -> str: + """Summary of the page (the first paragraph).""" + return self.paragraphs[0] + + def lookup(self, string: str) -> str: + """Lookup a term in the page, imitating cmd-F functionality.""" + if string.lower() != self.lookup_str: + self.lookup_str = string.lower() + self.lookup_index = 0 + else: + self.lookup_index += 1 + lookups = [p for p in self.paragraphs if self.lookup_str in p.lower()] + if len(lookups) == 0: + return "No Results" + elif self.lookup_index >= len(lookups): + return "No More Results" + else: + result_prefix = f"(Result {self.lookup_index + 1}/{len(lookups)})" + return f"{result_prefix} {lookups[self.lookup_index]}" + + +class BaseRetriever(ABC): + @abstractmethod + def get_relevant_texts(self, query: str) -> List[Document]: + """Get texts relevant for a query. + + Args: + query: string to find relevant tests for + + Returns: + List of relevant documents + """ + + +# For backwards compatibility + + +Memory = BaseMemory + + +class BaseOutputParser(BaseModel, ABC): + """Class to parse the output of an LLM call.""" + + @abstractmethod + def parse(self, text: str) -> Any: + """Parse the output of an LLM call.""" + + def parse_with_prompt(self, completion: str, prompt: PromptValue) -> Any: + return self.parse(completion) + + def get_format_instructions(self) -> str: + raise NotImplementedError + + @property + def _type(self) -> str: + """Return the type key.""" + raise NotImplementedError + + def dict(self, **kwargs: Any) -> Dict: + """Return dictionary representation of output parser.""" + output_parser_dict = super().dict() + output_parser_dict["_type"] = self._type + return output_parser_dict + + +class OutputParserException(Exception): + """Exception that output parsers should raise to signify a parsing error. + + This exists to differentiate parsing errors from other code or execution errors + that also may arise inside the output parser. OutputParserExceptions will be + available to catch and handle in ways to fix the parsing error, while other + errors will be raised. + """ + + pass diff --git a/langchain/serpapi.py b/langchain/serpapi.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8569b6b1d7cf41cbfd20e0d973da095b880eb5 --- /dev/null +++ b/langchain/serpapi.py @@ -0,0 +1,4 @@ +"""For backwards compatiblity.""" +from langchain.utilities.serpapi import SerpAPIWrapper + +__all__ = ["SerpAPIWrapper"] diff --git a/langchain/server.py b/langchain/server.py new file mode 100644 index 0000000000000000000000000000000000000000..4b00a478e54e7e8262afc1ad48db5869faac3933 --- /dev/null +++ b/langchain/server.py @@ -0,0 +1,14 @@ +"""Script to run langchain-server locally using docker-compose.""" +import subprocess +from pathlib import Path + + +def main() -> None: + """Run the langchain server locally.""" + p = Path(__file__).absolute().parent / "docker-compose.yaml" + subprocess.run(["docker-compose", "-f", str(p), "pull"]) + subprocess.run(["docker-compose", "-f", str(p), "up"]) + + +if __name__ == "__main__": + main() diff --git a/langchain/sql_database.py b/langchain/sql_database.py new file mode 100644 index 0000000000000000000000000000000000000000..19ced65ab2350bcc017ec2b9d105c301259d56b8 --- /dev/null +++ b/langchain/sql_database.py @@ -0,0 +1,217 @@ +"""SQLAlchemy wrapper around a database.""" +from __future__ import annotations + +from typing import Any, Iterable, List, Optional + +from sqlalchemy import MetaData, create_engine, inspect, select, text +from sqlalchemy.engine import Engine +from sqlalchemy.exc import ProgrammingError, SQLAlchemyError +from sqlalchemy.schema import CreateTable + + +class SQLDatabase: + """SQLAlchemy wrapper around a database.""" + + def __init__( + self, + engine: Engine, + schema: Optional[str] = None, + metadata: Optional[MetaData] = None, + ignore_tables: Optional[List[str]] = None, + include_tables: Optional[List[str]] = None, + sample_rows_in_table_info: int = 3, + custom_table_info: Optional[dict] = None, + ): + """Create engine from database URI.""" + self._engine = engine + self._schema = schema + if include_tables and ignore_tables: + raise ValueError("Cannot specify both include_tables and ignore_tables") + + self._inspector = inspect(self._engine) + self._all_tables = set(self._inspector.get_table_names(schema=schema)) + self._include_tables = set(include_tables) if include_tables else set() + if self._include_tables: + missing_tables = self._include_tables - self._all_tables + if missing_tables: + raise ValueError( + f"include_tables {missing_tables} not found in database" + ) + self._ignore_tables = set(ignore_tables) if ignore_tables else set() + if self._ignore_tables: + missing_tables = self._ignore_tables - self._all_tables + if missing_tables: + raise ValueError( + f"ignore_tables {missing_tables} not found in database" + ) + + if not isinstance(sample_rows_in_table_info, int): + raise TypeError("sample_rows_in_table_info must be an integer") + + self._sample_rows_in_table_info = sample_rows_in_table_info + + self._custom_table_info = custom_table_info + if self._custom_table_info: + if not isinstance(self._custom_table_info, dict): + raise TypeError( + "table_info must be a dictionary with table names as keys and the " + "desired table info as values" + ) + # only keep the tables that are also present in the database + intersection = set(self._custom_table_info).intersection(self._all_tables) + self._custom_table_info = dict( + (table, self._custom_table_info[table]) + for table in self._custom_table_info + if table in intersection + ) + + self._metadata = metadata or MetaData() + self._metadata.reflect(bind=self._engine) + + @classmethod + def from_uri(cls, database_uri: str, **kwargs: Any) -> SQLDatabase: + """Construct a SQLAlchemy engine from URI.""" + return cls(create_engine(database_uri), **kwargs) + + @property + def dialect(self) -> str: + """Return string representation of dialect to use.""" + return self._engine.dialect.name + + def get_table_names(self) -> Iterable[str]: + """Get names of tables available.""" + if self._include_tables: + return self._include_tables + return self._all_tables - self._ignore_tables + + @property + def table_info(self) -> str: + """Information about all tables in the database.""" + return self.get_table_info() + + def get_table_info(self, table_names: Optional[List[str]] = None) -> str: + """Get information about specified tables. + + Follows best practices as specified in: Rajkumar et al, 2022 + (https://arxiv.org/abs/2204.00498) + + If `sample_rows_in_table_info`, the specified number of sample rows will be + appended to each table description. This can increase performance as + demonstrated in the paper. + """ + all_table_names = self.get_table_names() + if table_names is not None: + missing_tables = set(table_names).difference(all_table_names) + if missing_tables: + raise ValueError(f"table_names {missing_tables} not found in database") + all_table_names = table_names + + meta_tables = [ + tbl + for tbl in self._metadata.sorted_tables + if tbl.name in set(all_table_names) + and not (self.dialect == "sqlite" and tbl.name.startswith("sqlite_")) + ] + + tables = [] + for table in meta_tables: + if self._custom_table_info and table.name in self._custom_table_info: + tables.append(self._custom_table_info[table.name]) + continue + + # add create table command + create_table = str(CreateTable(table).compile(self._engine)) + + if self._sample_rows_in_table_info: + # build the select command + command = select(table).limit(self._sample_rows_in_table_info) + + # save the columns in string format + columns_str = "\t".join([col.name for col in table.columns]) + + try: + # get the sample rows + with self._engine.connect() as connection: + sample_rows = connection.execute(command) + # shorten values in the sample rows + sample_rows = list( + map(lambda ls: [str(i)[:100] for i in ls], sample_rows) + ) + + # save the sample rows in string format + sample_rows_str = "\n".join(["\t".join(row) for row in sample_rows]) + + # in some dialects when there are no rows in the table a + # 'ProgrammingError' is returned + except ProgrammingError: + sample_rows_str = "" + + table_info = ( + f"{create_table.rstrip()}\n" + f"/*\n" + f"{self._sample_rows_in_table_info} rows from {table.name} table:\n" + f"{columns_str}\n" + f"{sample_rows_str}\n" + f"*/" + ) + + # build final info for table + tables.append(table_info) + + else: + tables.append(create_table) + + final_str = "\n\n".join(tables) + return final_str + + def run(self, command: str, fetch: str = "all") -> str: + """Execute a SQL command and return a string representing the results. + + If the statement returns rows, a string of the results is returned. + If the statement returns no rows, an empty string is returned. + """ + with self._engine.begin() as connection: + if self._schema is not None: + connection.exec_driver_sql(f"SET search_path TO {self._schema}") + cursor = connection.execute(text(command)) + if cursor.returns_rows: + if fetch == "all": + data = cursor.fetchall() + column_names = [desc.name for desc in cursor.context.cursor.description] + result = {"column_names": column_names, "data": data} + elif fetch == "one": + result = cursor.fetchone()[0] + else: + raise ValueError("Fetch parameter must be either 'one' or 'all'") + return str(result) + return "" + + def get_table_info_no_throw(self, table_names: Optional[List[str]] = None) -> str: + """Get information about specified tables. + + Follows best practices as specified in: Rajkumar et al, 2022 + (https://arxiv.org/abs/2204.00498) + + If `sample_rows_in_table_info`, the specified number of sample rows will be + appended to each table description. This can increase performance as + demonstrated in the paper. + """ + try: + return self.get_table_info(table_names) + except ValueError as e: + """Format the error message""" + return f"Error: {e}" + + def run_no_throw(self, command: str, fetch: str = "all") -> str: + """Execute a SQL command and return a string representing the results. + + If the statement returns rows, a string of the results is returned. + If the statement returns no rows, an empty string is returned. + + If the statement throws an error, the error message is returned. + """ + try: + return self.run(command, fetch) + except SQLAlchemyError as e: + """Format the error message""" + return f"Error: {e}" diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py new file mode 100644 index 0000000000000000000000000000000000000000..9da309a317f555059cba6ea2b2d5d1e1510da421 --- /dev/null +++ b/langchain/text_splitter.py @@ -0,0 +1,405 @@ +"""Functionality for splitting text.""" +from __future__ import annotations + +import copy +import logging +from abc import ABC, abstractmethod +from typing import ( + AbstractSet, + Any, + Callable, + Collection, + Iterable, + List, + Literal, + Optional, + Union, +) + +from langchain.docstore.document import Document + +logger = logging.getLogger() + + +class TextSplitter(ABC): + """Interface for splitting text into chunks.""" + + def __init__( + self, + chunk_size: int = 4000, + chunk_overlap: int = 200, + length_function: Callable[[str], int] = len, + ): + """Create a new TextSplitter.""" + if chunk_overlap > chunk_size: + raise ValueError( + f"Got a larger chunk overlap ({chunk_overlap}) than chunk size " + f"({chunk_size}), should be smaller." + ) + self._chunk_size = chunk_size + self._chunk_overlap = chunk_overlap + self._length_function = length_function + + @abstractmethod + def split_text(self, text: str) -> List[str]: + """Split text into multiple components.""" + + def create_documents( + self, texts: List[str], metadatas: Optional[List[dict]] = None + ) -> List[Document]: + """Create documents from a list of texts.""" + _metadatas = metadatas or [{}] * len(texts) + documents = [] + for i, text in enumerate(texts): + for chunk in self.split_text(text): + new_doc = Document( + page_content=chunk, metadata=copy.deepcopy(_metadatas[i]) + ) + documents.append(new_doc) + return documents + + def split_documents(self, documents: List[Document]) -> List[Document]: + """Split documents.""" + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return self.create_documents(texts, metadatas) + + def _join_docs(self, docs: List[str], separator: str) -> Optional[str]: + text = separator.join(docs) + text = text.strip() + if text == "": + return None + else: + return text + + def _merge_splits(self, splits: Iterable[str], separator: str) -> List[str]: + # We now want to combine these smaller pieces into medium size + # chunks to send to the LLM. + separator_len = self._length_function(separator) + + docs = [] + current_doc: List[str] = [] + total = 0 + for d in splits: + _len = self._length_function(d) + if ( + total + _len + (separator_len if len(current_doc) > 0 else 0) + > self._chunk_size + ): + if total > self._chunk_size: + logger.warning( + f"Created a chunk of size {total}, " + f"which is longer than the specified {self._chunk_size}" + ) + if len(current_doc) > 0: + doc = self._join_docs(current_doc, separator) + if doc is not None: + docs.append(doc) + # Keep on popping if: + # - we have a larger chunk than in the chunk overlap + # - or if we still have any chunks and the length is long + while total > self._chunk_overlap or ( + total + _len + (separator_len if len(current_doc) > 0 else 0) + > self._chunk_size + and total > 0 + ): + total -= self._length_function(current_doc[0]) + ( + separator_len if len(current_doc) > 1 else 0 + ) + current_doc = current_doc[1:] + current_doc.append(d) + total += _len + (separator_len if len(current_doc) > 1 else 0) + doc = self._join_docs(current_doc, separator) + if doc is not None: + docs.append(doc) + return docs + + @classmethod + def from_huggingface_tokenizer(cls, tokenizer: Any, **kwargs: Any) -> TextSplitter: + """Text splitter that uses HuggingFace tokenizer to count length.""" + try: + from transformers import PreTrainedTokenizerBase + + if not isinstance(tokenizer, PreTrainedTokenizerBase): + raise ValueError( + "Tokenizer received was not an instance of PreTrainedTokenizerBase" + ) + + def _huggingface_tokenizer_length(text: str) -> int: + return len(tokenizer.encode(text)) + + except ImportError: + raise ValueError( + "Could not import transformers python package. " + "Please it install it with `pip install transformers`." + ) + return cls(length_function=_huggingface_tokenizer_length, **kwargs) + + @classmethod + def from_tiktoken_encoder( + cls, + encoding_name: str = "gpt2", + allowed_special: Union[Literal["all"], AbstractSet[str]] = set(), + disallowed_special: Union[Literal["all"], Collection[str]] = "all", + **kwargs: Any, + ) -> TextSplitter: + """Text splitter that uses tiktoken encoder to count length.""" + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to calculate max_tokens_for_prompt. " + "Please it install it with `pip install tiktoken`." + ) + + # create a GPT-3 encoder instance + enc = tiktoken.get_encoding(encoding_name) + + def _tiktoken_encoder(text: str, **kwargs: Any) -> int: + return len( + enc.encode( + text, + allowed_special=allowed_special, + disallowed_special=disallowed_special, + **kwargs, + ) + ) + + return cls(length_function=_tiktoken_encoder, **kwargs) + + +class CharacterTextSplitter(TextSplitter): + """Implementation of splitting text that looks at characters.""" + + def __init__(self, separator: str = "\n\n", **kwargs: Any): + """Create a new TextSplitter.""" + super().__init__(**kwargs) + self._separator = separator + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + # First we naively split the large input into a bunch of smaller ones. + if self._separator: + splits = text.split(self._separator) + else: + splits = list(text) + return self._merge_splits(splits, self._separator) + + +class TokenTextSplitter(TextSplitter): + """Implementation of splitting text that looks at tokens.""" + + def __init__( + self, + encoding_name: str = "gpt2", + allowed_special: Union[Literal["all"], AbstractSet[str]] = set(), + disallowed_special: Union[Literal["all"], Collection[str]] = "all", + **kwargs: Any, + ): + """Create a new TextSplitter.""" + super().__init__(**kwargs) + try: + import tiktoken + except ImportError: + raise ValueError( + "Could not import tiktoken python package. " + "This is needed in order to for TokenTextSplitter. " + "Please it install it with `pip install tiktoken`." + ) + # create a GPT-3 encoder instance + self._tokenizer = tiktoken.get_encoding(encoding_name) + self._allowed_special = allowed_special + self._disallowed_special = disallowed_special + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + splits = [] + input_ids = self._tokenizer.encode( + text, + allowed_special=self._allowed_special, + disallowed_special=self._disallowed_special, + ) + start_idx = 0 + cur_idx = min(start_idx + self._chunk_size, len(input_ids)) + chunk_ids = input_ids[start_idx:cur_idx] + while start_idx < len(input_ids): + splits.append(self._tokenizer.decode(chunk_ids)) + start_idx += self._chunk_size - self._chunk_overlap + cur_idx = min(start_idx + self._chunk_size, len(input_ids)) + chunk_ids = input_ids[start_idx:cur_idx] + return splits + + +class RecursiveCharacterTextSplitter(TextSplitter): + """Implementation of splitting text that looks at characters. + + Recursively tries to split by different characters to find one + that works. + """ + + def __init__(self, separators: Optional[List[str]] = None, **kwargs: Any): + """Create a new TextSplitter.""" + super().__init__(**kwargs) + self._separators = separators or ["\n\n", "\n", " ", ""] + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + final_chunks = [] + # Get appropriate separator to use + separator = self._separators[-1] + for _s in self._separators: + if _s == "": + separator = _s + break + if _s in text: + separator = _s + break + # Now that we have the separator, split the text + if separator: + splits = text.split(separator) + else: + splits = list(text) + # Now go merging things, recursively splitting longer texts. + _good_splits = [] + for s in splits: + if self._length_function(s) < self._chunk_size: + _good_splits.append(s) + else: + if _good_splits: + merged_text = self._merge_splits(_good_splits, separator) + final_chunks.extend(merged_text) + _good_splits = [] + other_info = self.split_text(s) + final_chunks.extend(other_info) + if _good_splits: + merged_text = self._merge_splits(_good_splits, separator) + final_chunks.extend(merged_text) + return final_chunks + + +class NLTKTextSplitter(TextSplitter): + """Implementation of splitting text that looks at sentences using NLTK.""" + + def __init__(self, separator: str = "\n\n", **kwargs: Any): + """Initialize the NLTK splitter.""" + super().__init__(**kwargs) + try: + from nltk.tokenize import sent_tokenize + + self._tokenizer = sent_tokenize + except ImportError: + raise ImportError( + "NLTK is not installed, please install it with `pip install nltk`." + ) + self._separator = separator + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + # First we naively split the large input into a bunch of smaller ones. + splits = self._tokenizer(text) + return self._merge_splits(splits, self._separator) + + +class SpacyTextSplitter(TextSplitter): + """Implementation of splitting text that looks at sentences using Spacy.""" + + def __init__( + self, separator: str = "\n\n", pipeline: str = "en_core_web_sm", **kwargs: Any + ): + """Initialize the spacy text splitter.""" + super().__init__(**kwargs) + try: + import spacy + except ImportError: + raise ImportError( + "Spacy is not installed, please install it with `pip install spacy`." + ) + self._tokenizer = spacy.load(pipeline) + self._separator = separator + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + splits = (str(s) for s in self._tokenizer(text).sents) + return self._merge_splits(splits, self._separator) + + +class MarkdownTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Markdown-formatted headings.""" + + def __init__(self, **kwargs: Any): + """Initialize a MarkdownTextSplitter.""" + separators = [ + # First, try to split along Markdown headings (starting with level 2) + "\n## ", + "\n### ", + "\n#### ", + "\n##### ", + "\n###### ", + # Note the alternative syntax for headings (below) is not handled here + # Heading level 2 + # --------------- + # End of code block + "```\n\n", + # Horizontal lines + "\n\n***\n\n", + "\n\n---\n\n", + "\n\n___\n\n", + # Note that this splitter doesn't handle horizontal lines defined + # by *three or more* of ***, ---, or ___, but this is not handled + "\n\n", + "\n", + " ", + "", + ] + super().__init__(separators=separators, **kwargs) + + +class LatexTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Latex-formatted layout elements.""" + + def __init__(self, **kwargs: Any): + """Initialize a LatexTextSplitter.""" + separators = [ + # First, try to split along Latex sections + "\n\\chapter{", + "\n\\section{", + "\n\\subsection{", + "\n\\subsubsection{", + # Now split by environments + "\n\\begin{enumerate}", + "\n\\begin{itemize}", + "\n\\begin{description}", + "\n\\begin{list}", + "\n\\begin{quote}", + "\n\\begin{quotation}", + "\n\\begin{verse}", + "\n\\begin{verbatim}", + ## Now split by math environments + "\n\\begin{align}", + "$$", + "$", + # Now split by the normal type of lines + " ", + "", + ] + super().__init__(separators=separators, **kwargs) + + +class PythonCodeTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Python syntax.""" + + def __init__(self, **kwargs: Any): + """Initialize a MarkdownTextSplitter.""" + separators = [ + # First, try to split along class definitions + "\nclass ", + "\ndef ", + "\n\tdef ", + # Now split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + super().__init__(separators=separators, **kwargs) diff --git a/langchain/tools/__init__.py b/langchain/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fcacd361367f79601e0ed021d9e02c55f838805e --- /dev/null +++ b/langchain/tools/__init__.py @@ -0,0 +1,6 @@ +"""Core toolkit implementations.""" + +from langchain.tools.base import BaseTool +from langchain.tools.ifttt import IFTTTWebhook + +__all__ = ["BaseTool", "IFTTTWebhook"] diff --git a/langchain/tools/__pycache__/__init__.cpython-39.pyc b/langchain/tools/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a40570ed262912c3dcb468cc9bf0ee43f297b4c Binary files /dev/null and b/langchain/tools/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/__pycache__/base.cpython-39.pyc b/langchain/tools/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b744f374fffbfa7c3c3b61e0c1dfcb8edb3aa77 Binary files /dev/null and b/langchain/tools/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/tools/__pycache__/ifttt.cpython-39.pyc b/langchain/tools/__pycache__/ifttt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..673fdf5342c23fb075db79cd83cafa82446831d7 Binary files /dev/null and b/langchain/tools/__pycache__/ifttt.cpython-39.pyc differ diff --git a/langchain/tools/base.py b/langchain/tools/base.py new file mode 100644 index 0000000000000000000000000000000000000000..9a40cde1c872daab8e4dc0b09ae3a6624995274b --- /dev/null +++ b/langchain/tools/base.py @@ -0,0 +1,121 @@ +"""Base implementation for tools or skills.""" + +from abc import abstractmethod +from typing import Any, Optional + +from pydantic import BaseModel, Extra, Field, validator + +from langchain.callbacks import get_callback_manager +from langchain.callbacks.base import BaseCallbackManager + + +class BaseTool(BaseModel): + """Class responsible for defining a tool or skill for an LLM.""" + + name: str + description: str + return_direct: bool = False + verbose: bool = False + callback_manager: BaseCallbackManager = Field(default_factory=get_callback_manager) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @validator("callback_manager", pre=True, always=True) + def set_callback_manager( + cls, callback_manager: Optional[BaseCallbackManager] + ) -> BaseCallbackManager: + """If callback manager is None, set it. + + This allows users to pass in None as callback manager, which is a nice UX. + """ + return callback_manager or get_callback_manager() + + @abstractmethod + def _run(self, tool_input: str) -> str: + """Use the tool.""" + + @abstractmethod + async def _arun(self, tool_input: str) -> str: + """Use the tool asynchronously.""" + + def __call__(self, tool_input: str) -> str: + """Make tools callable with str input.""" + return self.run(tool_input) + + def run( + self, + tool_input: str, + verbose: Optional[bool] = None, + start_color: Optional[str] = "green", + color: Optional[str] = "green", + **kwargs: Any + ) -> str: + """Run the tool.""" + if verbose is None: + verbose = self.verbose + self.callback_manager.on_tool_start( + {"name": self.name, "description": self.description}, + tool_input, + verbose=verbose, + color=start_color, + **kwargs, + ) + try: + observation = self._run(tool_input) + except (Exception, KeyboardInterrupt) as e: + self.callback_manager.on_tool_error(e, verbose=verbose) + raise e + self.callback_manager.on_tool_end( + observation, verbose=verbose, color=color, **kwargs + ) + return observation + + async def arun( + self, + tool_input: str, + verbose: Optional[bool] = None, + start_color: Optional[str] = "green", + color: Optional[str] = "green", + **kwargs: Any + ) -> str: + """Run the tool asynchronously.""" + if verbose is None: + verbose = self.verbose + if self.callback_manager.is_async: + await self.callback_manager.on_tool_start( + {"name": self.name, "description": self.description}, + tool_input, + verbose=verbose, + color=start_color, + **kwargs, + ) + else: + self.callback_manager.on_tool_start( + {"name": self.name, "description": self.description}, + tool_input, + verbose=verbose, + color=start_color, + **kwargs, + ) + try: + # We then call the tool on the tool input to get an observation + observation = await self._arun(tool_input) + except (Exception, KeyboardInterrupt) as e: + if self.callback_manager.is_async: + await self.callback_manager.on_tool_error(e, verbose=verbose) + else: + self.callback_manager.on_tool_error(e, verbose=verbose) + raise e + if self.callback_manager.is_async: + await self.callback_manager.on_tool_end( + observation, verbose=verbose, color=color, **kwargs + ) + else: + self.callback_manager.on_tool_end( + observation, verbose=verbose, color=color, **kwargs + ) + return observation diff --git a/langchain/tools/bing_search/__init__.py b/langchain/tools/bing_search/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a993a0668ef6387792b5a8b94c9c9c5c4e9e89a3 --- /dev/null +++ b/langchain/tools/bing_search/__init__.py @@ -0,0 +1 @@ +"""Bing Search API toolkit.""" diff --git a/langchain/tools/bing_search/__pycache__/__init__.cpython-39.pyc b/langchain/tools/bing_search/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..befb43eb65db11fa169fa65d02b80124bac58614 Binary files /dev/null and b/langchain/tools/bing_search/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/bing_search/__pycache__/tool.cpython-39.pyc b/langchain/tools/bing_search/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c9d97fde827a3144f528fd2b982cda9d8a7ea9a Binary files /dev/null and b/langchain/tools/bing_search/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/bing_search/tool.py b/langchain/tools/bing_search/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..c851c3ee845c1dd8dfe41afed4caef6cbb0c6999 --- /dev/null +++ b/langchain/tools/bing_search/tool.py @@ -0,0 +1,24 @@ +"""Tool for the Bing search API.""" + +from langchain.tools.base import BaseTool +from langchain.utilities.bing_search import BingSearchAPIWrapper + + +class BingSearchRun(BaseTool): + """Tool that adds the capability to query the Bing search API.""" + + name = "Bing Search" + description = ( + "A wrapper around Bing Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query." + ) + api_wrapper: BingSearchAPIWrapper + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("BingSearchRun does not support async") diff --git a/langchain/tools/google_search/__init__.py b/langchain/tools/google_search/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1be19422b2cb519b55b727957a51557b73048839 --- /dev/null +++ b/langchain/tools/google_search/__init__.py @@ -0,0 +1 @@ +"""Google Search API Toolkit.""" diff --git a/langchain/tools/google_search/__pycache__/__init__.cpython-39.pyc b/langchain/tools/google_search/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5ca877265d03b711e09da31c13e62ddc5329130 Binary files /dev/null and b/langchain/tools/google_search/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/google_search/__pycache__/tool.cpython-39.pyc b/langchain/tools/google_search/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bad6f6f50c64dae698293a686ad62cedb0059f9d Binary files /dev/null and b/langchain/tools/google_search/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/google_search/tool.py b/langchain/tools/google_search/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..1945a3df8b009caf5dda86969f9d4da7ba9b614f --- /dev/null +++ b/langchain/tools/google_search/tool.py @@ -0,0 +1,45 @@ +"""Tool for the Google search API.""" + +from langchain.tools.base import BaseTool +from langchain.utilities.google_search import GoogleSearchAPIWrapper + + +class GoogleSearchRun(BaseTool): + """Tool that adds the capability to query the Google search API.""" + + name = "Google Search" + description = ( + "A wrapper around Google Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query." + ) + api_wrapper: GoogleSearchAPIWrapper + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("GoogleSearchRun does not support async") + + +class GoogleSearchResults(BaseTool): + """Tool that has capability to query the Google Search API and get back json.""" + + name = "Google Search Results JSON" + description = ( + "A wrapper around Google Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query. Output is a JSON array of the query results" + ) + num_results: int = 4 + api_wrapper: GoogleSearchAPIWrapper + + def _run(self, query: str) -> str: + """Use the tool.""" + return str(self.api_wrapper.results(query, self.num_results)) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("GoogleSearchRun does not support async") diff --git a/langchain/tools/human/__init__.py b/langchain/tools/human/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..704a29da0b408a3c3ccf175d8a2a60f15bb436a1 --- /dev/null +++ b/langchain/tools/human/__init__.py @@ -0,0 +1 @@ +"""Tool for asking for human input.""" diff --git a/langchain/tools/human/__pycache__/__init__.cpython-39.pyc b/langchain/tools/human/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdf6438541b44a5fd88882ad4c7ca429eeb51be7 Binary files /dev/null and b/langchain/tools/human/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/human/__pycache__/tool.cpython-39.pyc b/langchain/tools/human/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..115b6017238c87862f216cd148b247689e5e20ad Binary files /dev/null and b/langchain/tools/human/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/human/tool.py b/langchain/tools/human/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..de2cce81cfa0a183cf8433f636a313f5d99bc093 --- /dev/null +++ b/langchain/tools/human/tool.py @@ -0,0 +1,34 @@ +"""Tool for asking human input.""" + +from typing import Callable + +from pydantic import Field + +from langchain.tools.base import BaseTool + + +def _print_func(text: str) -> None: + print("\n") + print(text) + + +class HumanInputRun(BaseTool): + """Tool that adds the capability to ask user for input.""" + + name = "Human" + description = ( + "You can ask a human for guidance when you think you " + "got stuck or you are not sure what to do next. " + "The input should be a question for the human." + ) + prompt_func: Callable[[str], None] = Field(default_factory=lambda: _print_func) + input_func: Callable = Field(default_factory=lambda: input) + + def _run(self, query: str) -> str: + """Use the Human input tool.""" + self.prompt_func(query) + return self.input_func() + + async def _arun(self, query: str) -> str: + """Use the Human tool asynchronously.""" + raise NotImplementedError("Human tool does not support async") diff --git a/langchain/tools/ifttt.py b/langchain/tools/ifttt.py new file mode 100644 index 0000000000000000000000000000000000000000..8d3d943af0c59ed32cec471afd5bcb0ec75fd64a --- /dev/null +++ b/langchain/tools/ifttt.py @@ -0,0 +1,57 @@ +"""From https://github.com/SidU/teams-langchain-js/wiki/Connecting-IFTTT-Services. + +# Creating a webhook +- Go to https://ifttt.com/create + +# Configuring the "If This" +- Click on the "If This" button in the IFTTT interface. +- Search for "Webhooks" in the search bar. +- Choose the first option for "Receive a web request with a JSON payload." +- Choose an Event Name that is specific to the service you plan to connect to. +This will make it easier for you to manage the webhook URL. +For example, if you're connecting to Spotify, you could use "Spotify" as your +Event Name. +- Click the "Create Trigger" button to save your settings and create your webhook. + +# Configuring the "Then That" +- Tap on the "Then That" button in the IFTTT interface. +- Search for the service you want to connect, such as Spotify. +- Choose an action from the service, such as "Add track to a playlist". +- Configure the action by specifying the necessary details, such as the playlist name, +e.g., "Songs from AI". +- Reference the JSON Payload received by the Webhook in your action. For the Spotify +scenario, choose "{{JsonPayload}}" as your search query. +- Tap the "Create Action" button to save your action settings. +- Once you have finished configuring your action, click the "Finish" button to +complete the setup. +- Congratulations! You have successfully connected the Webhook to the desired +service, and you're ready to start receiving data and triggering actions 🎉 + +# Finishing up +- To get your webhook URL go to https://ifttt.com/maker_webhooks/settings +- Copy the IFTTT key value from there. The URL is of the form +https://maker.ifttt.com/use/YOUR_IFTTT_KEY. Grab the YOUR_IFTTT_KEY value. +""" +import requests + +from langchain.tools.base import BaseTool + + +class IFTTTWebhook(BaseTool): + """IFTTT Webhook. + + Args: + name: name of the tool + description: description of the tool + url: url to hit with the json event. + """ + + url: str + + def _run(self, tool_input: str) -> str: + body = {"this": tool_input} + response = requests.post(self.url, data=body) + return response.text + + async def _arun(self, tool_input: str) -> str: + raise NotImplementedError("Not implemented.") diff --git a/langchain/tools/interaction/__init__.py b/langchain/tools/interaction/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be3393362d8bc39d0f5d56dffd757fb51ae6ace2 --- /dev/null +++ b/langchain/tools/interaction/__init__.py @@ -0,0 +1 @@ +"""Tools for interacting with the user.""" diff --git a/langchain/tools/interaction/tool.py b/langchain/tools/interaction/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..ee2b51ca4cac743198a7d079418f4706bace7ef9 --- /dev/null +++ b/langchain/tools/interaction/tool.py @@ -0,0 +1,22 @@ +"""Tools for interacting with the user.""" + + +from langchain.tools.base import BaseTool + + +class StdInInquireTool(BaseTool): + """Tool for asking the user for input.""" + + name: str = "Inquire" + description: str = ( + "useful if you do not have enough information to" + " effectively use other tools. Input is best as a clarifying" + " question (to disambiguate) or a request for more context." + ) + + def _run(self, prompt: str) -> str: + """Prompt the user for more input.""" + return input(f"\n{prompt}") + + async def _arun(self, query: str) -> str: + raise NotImplementedError(f"{self.__class__.__name__} does not support async") diff --git a/langchain/tools/json/__init__.py b/langchain/tools/json/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d13302f008a09fa9747da055d4baeec957651ef4 --- /dev/null +++ b/langchain/tools/json/__init__.py @@ -0,0 +1 @@ +"""Tools for interacting with a JSON file.""" diff --git a/langchain/tools/json/__pycache__/__init__.cpython-39.pyc b/langchain/tools/json/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae61af7a14eb931f63c74901300d31082eec0271 Binary files /dev/null and b/langchain/tools/json/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/json/__pycache__/tool.cpython-39.pyc b/langchain/tools/json/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c866b1d0cb0b05a2d07f0eb2a1f1eec1e4c4200 Binary files /dev/null and b/langchain/tools/json/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/json/tool.py b/langchain/tools/json/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..9f1bdac737cb035d0ab6f1096d4743ebd804cc55 --- /dev/null +++ b/langchain/tools/json/tool.py @@ -0,0 +1,113 @@ +# flake8: noqa +"""Tools for working with JSON specs.""" +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Dict, List, Union + +from pydantic import BaseModel + +from langchain.tools.base import BaseTool + + +def _parse_input(text: str) -> List[Union[str, int]]: + """Parse input of the form data["key1"][0]["key2"] into a list of keys.""" + _res = re.findall(r"\[.*?]", text) + # strip the brackets and quotes, convert to int if possible + res = [i[1:-1].replace('"', "") for i in _res] + res = [int(i) if i.isdigit() else i for i in res] + return res + + +class JsonSpec(BaseModel): + """Base class for JSON spec.""" + + dict_: Dict + max_value_length: int = 200 + + @classmethod + def from_file(cls, path: Path) -> JsonSpec: + """Create a JsonSpec from a file.""" + if not path.exists(): + raise FileNotFoundError(f"File not found: {path}") + dict_ = json.loads(path.read_text()) + return cls(dict_=dict_) + + def keys(self, text: str) -> str: + """Return the keys of the dict at the given path. + + Args: + text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). + """ + try: + items = _parse_input(text) + val = self.dict_ + for i in items: + if i: + val = val[i] + if not isinstance(val, dict): + raise ValueError( + f"Value at path `{text}` is not a dict, get the value directly." + ) + return str(list(val.keys())) + except Exception as e: + return repr(e) + + def value(self, text: str) -> str: + """Return the value of the dict at the given path. + + Args: + text: Python representation of the path to the dict (e.g. data["key1"][0]["key2"]). + """ + try: + items = _parse_input(text) + val = self.dict_ + for i in items: + val = val[i] + + if isinstance(val, dict) and len(str(val)) > self.max_value_length: + return "Value is a large dictionary, should explore its keys directly" + str_val = str(val) + if len(str_val) > self.max_value_length: + str_val = str_val[: self.max_value_length] + "..." + return str_val + except Exception as e: + return repr(e) + + +class JsonListKeysTool(BaseTool): + """Tool for listing keys in a JSON spec.""" + + name = "json_spec_list_keys" + description = """ + Can be used to list all keys at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). + """ + spec: JsonSpec + + def _run(self, tool_input: str) -> str: + return self.spec.keys(tool_input) + + async def _arun(self, tool_input: str) -> str: + return self._run(tool_input) + + +class JsonGetValueTool(BaseTool): + """Tool for getting a value in a JSON spec.""" + + name = "json_spec_get_value" + description = """ + Can be used to see value in string format at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the dict in Python syntax (e.g. data["key1"][0]["key2"]). + """ + spec: JsonSpec + + def _run(self, tool_input: str) -> str: + return self.spec.value(tool_input) + + async def _arun(self, tool_input: str) -> str: + return self._run(tool_input) diff --git a/langchain/tools/python/__init__.py b/langchain/tools/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/langchain/tools/python/__pycache__/__init__.cpython-39.pyc b/langchain/tools/python/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9517dfbc8b59ab1a4e15a609f0f1ae27a4058847 Binary files /dev/null and b/langchain/tools/python/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/python/__pycache__/tool.cpython-39.pyc b/langchain/tools/python/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea8dcce95388e70e6a0d8ade152774606cc24b09 Binary files /dev/null and b/langchain/tools/python/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/python/tool.py b/langchain/tools/python/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..0a1adb46286af4d4b7578db84028b4969b138296 --- /dev/null +++ b/langchain/tools/python/tool.py @@ -0,0 +1,80 @@ +"""A tool for running python code in a REPL.""" + +import ast +import sys +from typing import Dict, Optional + +from pydantic import Field, root_validator + +from langchain.python import PythonREPL +from langchain.tools.base import BaseTool + + +def _get_default_python_repl() -> PythonREPL: + return PythonREPL(_globals=globals(), _locals=None) + + +class PythonREPLTool(BaseTool): + """A tool for running python code in a REPL.""" + + name = "Python REPL" + description = ( + "A Python shell. Use this to execute python commands. " + "Input should be a valid python command. " + "If you want to see the output of a value, you should print it out " + "with `print(...)`." + ) + python_repl: PythonREPL = Field(default_factory=_get_default_python_repl) + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.python_repl.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("PythonReplTool does not support async") + + +class PythonAstREPLTool(BaseTool): + """A tool for running python code in a REPL.""" + + name = "python_repl_ast" + description = ( + "A Python shell. Use this to execute python commands. " + "Input should be a valid python command. " + "When using this tool, sometimes output is abbreviated - " + "make sure it does not look abbreviated before using it in your answer." + ) + globals: Optional[Dict] = Field(default_factory=dict) + locals: Optional[Dict] = Field(default_factory=dict) + + @root_validator(pre=True) + def validate_python_version(cls, values: Dict) -> Dict: + """Validate valid python version.""" + if sys.version_info < (3, 9): + raise ValueError( + "This tool relies on Python 3.9 or higher " + "(as it uses new functionality in the `ast` module, " + f"you have Python version: {sys.version}" + ) + return values + + def _run(self, query: str) -> str: + """Use the tool.""" + try: + tree = ast.parse(query) + module = ast.Module(tree.body[:-1], type_ignores=[]) + exec(ast.unparse(module), self.globals, self.locals) # type: ignore + module_end = ast.Module(tree.body[-1:], type_ignores=[]) + module_end_str = ast.unparse(module_end) # type: ignore + try: + return eval(module_end_str, self.globals, self.locals) + except Exception: + exec(module_end_str, self.globals, self.locals) + return "" + except Exception as e: + return str(e) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("PythonReplTool does not support async") diff --git a/langchain/tools/requests/__init__.py b/langchain/tools/requests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ec421f18dbdaa5b6f060ae11a6b92c21b9177377 --- /dev/null +++ b/langchain/tools/requests/__init__.py @@ -0,0 +1 @@ +"""Tools for making requests to an API endpoint.""" diff --git a/langchain/tools/requests/__pycache__/__init__.cpython-39.pyc b/langchain/tools/requests/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54cf466001d2e48666572a2e8c0caf1d9d4cf67e Binary files /dev/null and b/langchain/tools/requests/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/requests/__pycache__/tool.cpython-39.pyc b/langchain/tools/requests/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f73bed090089b7d5e8f2dc7ce33ac2b71bfa621e Binary files /dev/null and b/langchain/tools/requests/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/requests/tool.py b/langchain/tools/requests/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..7802f598f54d9b085981bd5ca6bc281cc7202eb8 --- /dev/null +++ b/langchain/tools/requests/tool.py @@ -0,0 +1,137 @@ +# flake8: noqa +"""Tools for making requests to an API endpoint.""" +import json +from typing import Any, Dict + +from pydantic import BaseModel + +from langchain.requests import RequestsWrapper +from langchain.tools.base import BaseTool + + +def _parse_input(text: str) -> Dict[str, Any]: + """Parse the json string into a dict.""" + return json.loads(text) + + +class BaseRequestsTool(BaseModel): + """Base class for requests tools.""" + + requests_wrapper: RequestsWrapper + + +class RequestsGetTool(BaseRequestsTool, BaseTool): + """Tool for making a GET request to an API endpoint.""" + + name = "requests_get" + description = "A portal to the internet. Use this when you need to get specific content from a website. Input should be a url (i.e. https://www.google.com). The output will be the text response of the GET request." + + def _run(self, url: str) -> str: + """Run the tool.""" + return self.requests_wrapper.get(url) + + async def _arun(self, url: str) -> str: + """Run the tool asynchronously.""" + return await self.requests_wrapper.aget(url) + + +class RequestsPostTool(BaseRequestsTool, BaseTool): + """Tool for making a POST request to an API endpoint.""" + + name = "requests_post" + description = """Use this when you want to POST to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to POST to the url. + Be careful to always use double quotes for strings in the json string + The output will be the text response of the POST request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.post(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.apost(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsPatchTool(BaseRequestsTool, BaseTool): + """Tool for making a PATCH request to an API endpoint.""" + + name = "requests_patch" + description = """Use this when you want to PATCH to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to PATCH to the url. + Be careful to always use double quotes for strings in the json string + The output will be the text response of the PATCH request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.patch(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.apatch(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsPutTool(BaseRequestsTool, BaseTool): + """Tool for making a PUT request to an API endpoint.""" + + name = "requests_put" + description = """Use this when you want to PUT to a website. + Input should be a json string with two keys: "url" and "data". + The value of "url" should be a string, and the value of "data" should be a dictionary of + key-value pairs you want to PUT to the url. + Be careful to always use double quotes for strings in the json string. + The output will be the text response of the PUT request. + """ + + def _run(self, text: str) -> str: + """Run the tool.""" + try: + data = _parse_input(text) + return self.requests_wrapper.put(data["url"], data["data"]) + except Exception as e: + return repr(e) + + async def _arun(self, text: str) -> str: + """Run the tool asynchronously.""" + try: + data = _parse_input(text) + return await self.requests_wrapper.aput(data["url"], data["data"]) + except Exception as e: + return repr(e) + + +class RequestsDeleteTool(BaseRequestsTool, BaseTool): + """Tool for making a DELETE request to an API endpoint.""" + + name = "requests_delete" + description = "A portal to the internet. Use this when you need to make a DELETE request to a URL. Input should be a specific url, and the output will be the text response of the DELETE request." + + def _run(self, url: str) -> str: + """Run the tool.""" + return self.requests_wrapper.delete(url) + + async def _arun(self, url: str) -> str: + """Run the tool asynchronously.""" + return await self.requests_wrapper.adelete(url) diff --git a/langchain/tools/sql_database/__init__.py b/langchain/tools/sql_database/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..90fb3be1322f1bfab6e86d94f15fa4fac4639208 --- /dev/null +++ b/langchain/tools/sql_database/__init__.py @@ -0,0 +1 @@ +"""Tools for interacting with a SQL database.""" diff --git a/langchain/tools/sql_database/__pycache__/__init__.cpython-39.pyc b/langchain/tools/sql_database/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0cf7998740d935b29f46328275203b3e559b8679 Binary files /dev/null and b/langchain/tools/sql_database/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/sql_database/__pycache__/prompt.cpython-39.pyc b/langchain/tools/sql_database/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6b5f947ea0878e599280630fd0900839872936b Binary files /dev/null and b/langchain/tools/sql_database/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/tools/sql_database/__pycache__/tool.cpython-39.pyc b/langchain/tools/sql_database/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..652dd17fcd525c9c02ac6c8a9bdb19bb53c859a7 Binary files /dev/null and b/langchain/tools/sql_database/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/sql_database/prompt.py b/langchain/tools/sql_database/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..8d2b097358f2421d35abcc9caa8e7951f24c5e58 --- /dev/null +++ b/langchain/tools/sql_database/prompt.py @@ -0,0 +1,14 @@ +# flake8: noqa +QUERY_CHECKER = """ +{query} +Double check the {dialect} query above for common mistakes, including: +- Using NOT IN with NULL values +- Using UNION when UNION ALL should have been used +- Using BETWEEN for exclusive ranges +- Data type mismatch in predicates +- Properly quoting identifiers +- Using the correct number of arguments for functions +- Casting to the correct data type +- Using the proper columns for joins + +If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.""" diff --git a/langchain/tools/sql_database/tool.py b/langchain/tools/sql_database/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..1fe10499cdfd8c5d3a26b491e4cd6c9254accebc --- /dev/null +++ b/langchain/tools/sql_database/tool.py @@ -0,0 +1,124 @@ +# flake8: noqa +"""Tools for interacting with a SQL database.""" +from pydantic import BaseModel, Extra, Field, validator + +from langchain.chains.llm import LLMChain +from langchain.llms.openai import OpenAI +from langchain.prompts import PromptTemplate +from langchain.sql_database import SQLDatabase +from langchain.tools.base import BaseTool +from langchain.tools.sql_database.prompt import QUERY_CHECKER + + +class ClarifyTool(BaseTool): + """Tool for clarifying a query.""" + + name = "clarify" + description = "Input to this tool is the clarification question" \ + "send a message back to the customer to clarify their query" + return_direct = True + + def _run(self, clarification: str) -> str: + """Run the tool.""" + return clarification + + +class BaseSQLDatabaseTool(BaseModel): + """Base tool for interacting with a SQL database.""" + + db: SQLDatabase = Field(exclude=True) + + # Override BaseTool.Config to appease mypy + # See https://github.com/pydantic/pydantic/issues/4173 + class Config(BaseTool.Config): + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + extra = Extra.forbid + + +class QuerySQLDataBaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for querying a SQL database.""" + + name = "query_sql_db" + description = """ + Input to this tool is a detailed and correct SQL query, output is a result from the database. + If the query is not correct, an error message will be returned. + If an error is returned, rewrite the query, check the query, and try again. + """ + + def _run(self, query: str) -> str: + """Execute the query, return the results or an error message.""" + return self.db.run_no_throw(query) + + async def _arun(self, query: str) -> str: + raise NotImplementedError("QuerySqlDbTool does not support async") + + +class InfoSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting metadata about a SQL database.""" + + name = "schema_sql_db" + description = """ + Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. + Be sure that the tables actually exist by calling list_tables_sql_db first! + + Example Input: "table1, table2, table3" + """ + + def _run(self, table_names: str) -> str: + """Get the schema for tables in a comma-separated list.""" + return self.db.get_table_info_no_throw(table_names.split(", ")) + + async def _arun(self, table_name: str) -> str: + raise NotImplementedError("SchemaSqlDbTool does not support async") + + +class ListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): + """Tool for getting tables names.""" + + name = "list_tables_sql_db" + description = "Input is an empty string, output is a comma separated list of tables in the database." + + def _run(self, tool_input: str = "") -> str: + """Get the schema for a specific table.""" + return ", ".join(self.db.get_table_names()) + + async def _arun(self, tool_input: str = "") -> str: + raise NotImplementedError("ListTablesSqlDbTool does not support async") + + +class QueryCheckerTool(BaseSQLDatabaseTool, BaseTool): + """Use an LLM to check if a query is correct. + Adapted from https://www.patterns.app/blog/2023/01/18/crunchbot-sql-analyst-gpt/""" + + template: str = QUERY_CHECKER + llm_chain: LLMChain = Field( + default_factory=lambda: LLMChain( + llm=OpenAI(temperature=0), + prompt=PromptTemplate( + template=QUERY_CHECKER, input_variables=["query", "dialect"] + ), + ) + ) + name = "query_checker_sql_db" + description = """ + Use this tool to double check if your query is correct before executing it. + Always use this tool before executing a query with query_sql_db! + """ + + @validator("llm_chain") + def validate_llm_chain_input_variables(cls, llm_chain: LLMChain) -> LLMChain: + """Make sure the LLM chain has the correct input variables.""" + if llm_chain.prompt.input_variables != ["query", "dialect"]: + raise ValueError( + "LLM chain for QueryCheckerTool must have input variables ['query', 'dialect']" + ) + return llm_chain + + def _run(self, query: str) -> str: + """Use the LLM to check the query.""" + return self.llm_chain.predict(query=query, dialect=self.db.dialect) + + async def _arun(self, query: str) -> str: + return await self.llm_chain.apredict(query=query, dialect=self.db.dialect) diff --git a/langchain/tools/vectorstore/__init__.py b/langchain/tools/vectorstore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bb638101959c35f933e0cf8601a448205a5d43a --- /dev/null +++ b/langchain/tools/vectorstore/__init__.py @@ -0,0 +1 @@ +"""Simple tool wrapper around VectorDBQA chain.""" diff --git a/langchain/tools/vectorstore/__pycache__/__init__.cpython-39.pyc b/langchain/tools/vectorstore/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d240a38e9d9bbadeb78ee024b82c1e55f4fb5978 Binary files /dev/null and b/langchain/tools/vectorstore/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/vectorstore/__pycache__/tool.cpython-39.pyc b/langchain/tools/vectorstore/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3da3aa1bedd49da552e600f8305f5e2c9a641f9e Binary files /dev/null and b/langchain/tools/vectorstore/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/vectorstore/tool.py b/langchain/tools/vectorstore/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..adc35e7f77be78e913f58c1fd941e3ea3d5f45b8 --- /dev/null +++ b/langchain/tools/vectorstore/tool.py @@ -0,0 +1,81 @@ +"""Tools for interacting with vectorstores.""" + +import json +from typing import Any, Dict + +from pydantic import BaseModel, Field + +from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain +from langchain.chains.retrieval_qa.base import VectorDBQA +from langchain.llms.base import BaseLLM +from langchain.llms.openai import OpenAI +from langchain.tools.base import BaseTool +from langchain.vectorstores.base import VectorStore + + +class BaseVectorStoreTool(BaseModel): + """Base class for tools that use a VectorStore.""" + + vectorstore: VectorStore = Field(exclude=True) + llm: BaseLLM = Field(default_factory=lambda: OpenAI(temperature=0)) + + class Config(BaseTool.Config): + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + +def _create_description_from_template(values: Dict[str, Any]) -> Dict[str, Any]: + values["description"] = values["template"].format(name=values["name"]) + return values + + +class VectorStoreQATool(BaseVectorStoreTool, BaseTool): + """Tool for the VectorDBQA chain. To be initialized with name and chain.""" + + @staticmethod + def get_description(name: str, description: str) -> str: + template: str = ( + "Useful for when you need to answer questions about {name}. " + "Whenever you need information about {description} " + "you should ALWAYS use this. " + "Input should be a fully formed question." + ) + return template.format(name=name, description=description) + + def _run(self, query: str) -> str: + """Use the tool.""" + chain = VectorDBQA.from_chain_type(self.llm, vectorstore=self.vectorstore) + return chain.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("VectorDBQATool does not support async") + + +class VectorStoreQAWithSourcesTool(BaseVectorStoreTool, BaseTool): + """Tool for the VectorDBQAWithSources chain.""" + + @staticmethod + def get_description(name: str, description: str) -> str: + template: str = ( + "Useful for when you need to answer questions about {name} and the sources " + "used to construct the answer. " + "Whenever you need information about {description} " + "you should ALWAYS use this. " + " Input should be a fully formed question. " + "Output is a json serialized dictionary with keys `answer` and `sources`. " + "Only use this tool if the user explicitly asks for sources." + ) + return template.format(name=name, description=description) + + def _run(self, query: str) -> str: + """Use the tool.""" + chain = VectorDBQAWithSourcesChain.from_chain_type( + self.llm, vectorstore=self.vectorstore + ) + return json.dumps(chain({chain.question_key: query}, return_only_outputs=True)) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("VectorDBQATool does not support async") diff --git a/langchain/tools/wikipedia/__init__.py b/langchain/tools/wikipedia/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0b3edd083874aea350e44514d24c8f692307daef --- /dev/null +++ b/langchain/tools/wikipedia/__init__.py @@ -0,0 +1 @@ +"""Wikipedia API toolkit.""" diff --git a/langchain/tools/wikipedia/__pycache__/__init__.cpython-39.pyc b/langchain/tools/wikipedia/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36f0d17c638944b8772c2dee6a6c37957587062d Binary files /dev/null and b/langchain/tools/wikipedia/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/wikipedia/__pycache__/tool.cpython-39.pyc b/langchain/tools/wikipedia/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d33cfbe3aa2b6db360aae979321aae11c38b6ba Binary files /dev/null and b/langchain/tools/wikipedia/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/wikipedia/tool.py b/langchain/tools/wikipedia/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..5bede75b21689f80ea9e5e0e8de5b75db4992837 --- /dev/null +++ b/langchain/tools/wikipedia/tool.py @@ -0,0 +1,25 @@ +"""Tool for the Wikipedia API.""" + +from langchain.tools.base import BaseTool +from langchain.utilities.wikipedia import WikipediaAPIWrapper + + +class WikipediaQueryRun(BaseTool): + """Tool that adds the capability to search using the Wikipedia API.""" + + name = "Wikipedia" + description = ( + "A wrapper around Wikipedia. " + "Useful for when you need to answer general questions about " + "people, places, companies, historical events, or other subjects. " + "Input should be a search query." + ) + api_wrapper: WikipediaAPIWrapper + + def _run(self, query: str) -> str: + """Use the Wikipedia tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the Wikipedia tool asynchronously.""" + raise NotImplementedError("WikipediaQueryRun does not support async") diff --git a/langchain/tools/wolfram_alpha/__init__.py b/langchain/tools/wolfram_alpha/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e6870ac88c5e5ef7202b4b4df980c0743d2284b6 --- /dev/null +++ b/langchain/tools/wolfram_alpha/__init__.py @@ -0,0 +1 @@ +"""Wolfram Alpha API toolkit.""" diff --git a/langchain/tools/wolfram_alpha/__pycache__/__init__.cpython-39.pyc b/langchain/tools/wolfram_alpha/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91283fc3f479c7590a7d07728298c9509e7cc18f Binary files /dev/null and b/langchain/tools/wolfram_alpha/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/wolfram_alpha/__pycache__/tool.cpython-39.pyc b/langchain/tools/wolfram_alpha/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cab1df27364456a6e4b30da616d3173a6d555a44 Binary files /dev/null and b/langchain/tools/wolfram_alpha/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/wolfram_alpha/tool.py b/langchain/tools/wolfram_alpha/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..ecac7b8f463946446b6a791c3568e8f354f52a92 --- /dev/null +++ b/langchain/tools/wolfram_alpha/tool.py @@ -0,0 +1,25 @@ +"""Tool for the Wolfram Alpha API.""" + +from langchain.tools.base import BaseTool +from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper + + +class WolframAlphaQueryRun(BaseTool): + """Tool that adds the capability to query using the Wolfram Alpha SDK.""" + + name = "Wolfram Alpha" + description = ( + "A wrapper around Wolfram Alpha. " + "Useful for when you need to answer questions about Math, " + "Science, Technology, Culture, Society and Everyday Life. " + "Input should be a search query." + ) + api_wrapper: WolframAlphaAPIWrapper + + def _run(self, query: str) -> str: + """Use the WolframAlpha tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the WolframAlpha tool asynchronously.""" + raise NotImplementedError("WolframAlphaQueryRun does not support async") diff --git a/langchain/tools/zapier/__init__.py b/langchain/tools/zapier/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef6b8a2aa64e38b3c7fad860bc406ecbaec1bb11 --- /dev/null +++ b/langchain/tools/zapier/__init__.py @@ -0,0 +1 @@ +"""Zapier Tool.""" diff --git a/langchain/tools/zapier/__pycache__/__init__.cpython-39.pyc b/langchain/tools/zapier/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca4717fab5da12de307169068e9eed41b59c7190 Binary files /dev/null and b/langchain/tools/zapier/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/tools/zapier/__pycache__/prompt.cpython-39.pyc b/langchain/tools/zapier/__pycache__/prompt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..637d93ba1f6474fa32c280874b8e04fa9ed8548c Binary files /dev/null and b/langchain/tools/zapier/__pycache__/prompt.cpython-39.pyc differ diff --git a/langchain/tools/zapier/__pycache__/tool.cpython-39.pyc b/langchain/tools/zapier/__pycache__/tool.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5557f99a34293a1fc93f8a2d73af5c380c5bdb2 Binary files /dev/null and b/langchain/tools/zapier/__pycache__/tool.cpython-39.pyc differ diff --git a/langchain/tools/zapier/prompt.py b/langchain/tools/zapier/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..063e3952ef2aaf1122b63c17e39b06c8c4dc3f06 --- /dev/null +++ b/langchain/tools/zapier/prompt.py @@ -0,0 +1,15 @@ +# flake8: noqa +BASE_ZAPIER_TOOL_PROMPT = ( + "A wrapper around Zapier NLA actions. " + "The input to this tool is a natural language instruction, " + 'for example "get the latest email from my bank" or ' + '"send a slack message to the #general channel". ' + "Each tool will have params associated with it that are specified as a list. You MUST take into account the params when creating the instruction. " + "For example, if the params are ['Message_Text', 'Channel'], your instruction should be something like 'send a slack message to the #general channel with the text hello world'. " + "Another example: if the params are ['Calendar', 'Search_Term'], your instruction should be something like 'find the meeting in my personal calendar at 3pm'. " + "Do not make up params, they will be explicitly specified in the tool description. " + "If you do not have enough information to fill in the params, just say 'not enough information provided in the instruction, missing '. " + "If you get a none or null response, STOP EXECUTION, do not try to another tool!" + "This tool specifically used for: {zapier_description}, " + "and has params: {params}" +) diff --git a/langchain/tools/zapier/tool.py b/langchain/tools/zapier/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..275c90020cf06cce52bd4fd8db61ee47603815e9 --- /dev/null +++ b/langchain/tools/zapier/tool.py @@ -0,0 +1,159 @@ +"""## Zapier Natural Language Actions API +\ +Full docs here: https://nla.zapier.com/api/v1/docs + +**Zapier Natural Language Actions** gives you access to the 5k+ apps, 20k+ actions +on Zapier's platform through a natural language API interface. + +NLA supports apps like Gmail, Salesforce, Trello, Slack, Asana, HubSpot, Google Sheets, +Microsoft Teams, and thousands more apps: https://zapier.com/apps + +Zapier NLA handles ALL the underlying API auth and translation from +natural language --> underlying API call --> return simplified output for LLMs +The key idea is you, or your users, expose a set of actions via an oauth-like setup +window, which you can then query and execute via a REST API. + +NLA offers both API Key and OAuth for signing NLA API requests. + +1. Server-side (API Key): for quickly getting started, testing, and production scenarios + where LangChain will only use actions exposed in the developer's Zapier account + (and will use the developer's connected accounts on Zapier.com) + +2. User-facing (Oauth): for production scenarios where you are deploying an end-user + facing application and LangChain needs access to end-user's exposed actions and + connected accounts on Zapier.com + +This quick start will focus on the server-side use case for brevity. +Review [full docs](https://nla.zapier.com/api/v1/docs) or reach out to +nla@zapier.com for user-facing oauth developer support. + +Typically you'd use SequentialChain, here's a basic example: + + 1. Use NLA to find an email in Gmail + 2. Use LLMChain to generate a draft reply to (1) + 3. Use NLA to send the draft reply (2) to someone in Slack via direct mesage + +In code, below: + +```python + +import os + +# get from https://platform.openai.com/ +os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "") + +# get from https://nla.zapier.com/demo/provider/debug +# (under User Information, after logging in): +os.environ["ZAPIER_NLA_API_KEY"] = os.environ.get("ZAPIER_NLA_API_KEY", "") + +from langchain.llms import OpenAI +from langchain.agents import initialize_agent +from langchain.agents.agent_toolkits import ZapierToolkit +from langchain.utilities.zapier import ZapierNLAWrapper + +## step 0. expose gmail 'find email' and slack 'send channel message' actions + +# first go here, log in, expose (enable) the two actions: +# https://nla.zapier.com/demo/start +# -- for this example, can leave all fields "Have AI guess" +# in an oauth scenario, you'd get your own id (instead of 'demo') +# which you route your users through first + +llm = OpenAI(temperature=0) +zapier = ZapierNLAWrapper() +toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier) +agent = initialize_agent( + toolkit.get_tools(), + llm, + agent="zero-shot-react-description", + verbose=True +) + +agent.run(("Summarize the last email I received regarding Silicon Valley Bank. " + "Send the summary to the #test-zapier channel in slack.")) +``` + +""" +from typing import Any, Dict, Optional + +from pydantic import Field, root_validator + +from langchain.tools.base import BaseTool +from langchain.tools.zapier.prompt import BASE_ZAPIER_TOOL_PROMPT +from langchain.utilities.zapier import ZapierNLAWrapper + + +class ZapierNLARunAction(BaseTool): + """ + Args: + action_id: a specific action ID (from list actions) of the action to execute + (the set api_key must be associated with the action owner) + instructions: a natural language instruction string for using the action + (eg. "get the latest email from Mike Knoop" for "Gmail: find email" action) + params: a dict, optional. Any params provided will *override* AI guesses + from `instructions` (see "understanding the AI guessing flow" here: + https://nla.zapier.com/api/v1/docs) + """ + + api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) + action_id: str + params: Optional[dict] = None + zapier_description: str + params_schema: Dict[str, str] = Field(default_factory=dict) + name = "" + description = "" + + @root_validator + def set_name_description(cls, values: Dict[str, Any]) -> Dict[str, Any]: + zapier_description = values["zapier_description"] + params_schema = values["params_schema"] + if "instructions" in params_schema: + del params_schema["instructions"] + values["name"] = zapier_description + values["description"] = BASE_ZAPIER_TOOL_PROMPT.format( + zapier_description=zapier_description, + params=str(list(params_schema.keys())), + ) + return values + + def _run(self, instructions: str) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + return self.api_wrapper.run_as_str(self.action_id, instructions, self.params) + + async def _arun(self, _: str) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + raise NotImplementedError("ZapierNLAListActions does not support async") + + +ZapierNLARunAction.__doc__ = ( + ZapierNLAWrapper.run.__doc__ + ZapierNLARunAction.__doc__ # type: ignore +) + + +# other useful actions + + +class ZapierNLAListActions(BaseTool): + """ + Args: + None + """ + + name = "Zapier NLA: List Actions" + description = BASE_ZAPIER_TOOL_PROMPT + ( + "This tool returns a list of the user's exposed actions." + ) + api_wrapper: ZapierNLAWrapper = Field(default_factory=ZapierNLAWrapper) + + def _run(self, _: str) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + return self.api_wrapper.list_as_str() + + async def _arun(self, _: str) -> str: + """Use the Zapier NLA tool to return a list of all exposed user actions.""" + raise NotImplementedError("ZapierNLAListActions does not support async") + + +ZapierNLAListActions.__doc__ = ( + ZapierNLAWrapper.list.__doc__ + ZapierNLAListActions.__doc__ # type: ignore +) diff --git a/langchain/utilities/__init__.py b/langchain/utilities/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..058ab5f7e9dc003bd15b4a867b3dd5386e8b8979 --- /dev/null +++ b/langchain/utilities/__init__.py @@ -0,0 +1,24 @@ +"""General utilities.""" +from langchain.python import PythonREPL +from langchain.requests import RequestsWrapper +from langchain.utilities.bash import BashProcess +from langchain.utilities.bing_search import BingSearchAPIWrapper +from langchain.utilities.google_search import GoogleSearchAPIWrapper +from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.searx_search import SearxSearchWrapper +from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper +from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper + +__all__ = [ + "BashProcess", + "RequestsWrapper", + "PythonREPL", + "GoogleSearchAPIWrapper", + "GoogleSerperAPIWrapper", + "WolframAlphaAPIWrapper", + "SerpAPIWrapper", + "SearxSearchWrapper", + "BingSearchAPIWrapper", + "WikipediaAPIWrapper", +] diff --git a/langchain/utilities/__pycache__/__init__.cpython-39.pyc b/langchain/utilities/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0fbc1931c4610cfc5f446921855c910eaf1369af Binary files /dev/null and b/langchain/utilities/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/bash.cpython-39.pyc b/langchain/utilities/__pycache__/bash.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45a61f16c2b36e2815abc5491c5ef11317c2708e Binary files /dev/null and b/langchain/utilities/__pycache__/bash.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/bing_search.cpython-39.pyc b/langchain/utilities/__pycache__/bing_search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8999b5af15e7b94316f142fc35bc374bff554c92 Binary files /dev/null and b/langchain/utilities/__pycache__/bing_search.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/google_search.cpython-39.pyc b/langchain/utilities/__pycache__/google_search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0352f8da21f62d39f0d4409ae4168d8ede4bd76 Binary files /dev/null and b/langchain/utilities/__pycache__/google_search.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/google_serper.cpython-39.pyc b/langchain/utilities/__pycache__/google_serper.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..28f59d5fa30bda6d62d95c46513a9f7c2e978eeb Binary files /dev/null and b/langchain/utilities/__pycache__/google_serper.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/loading.cpython-39.pyc b/langchain/utilities/__pycache__/loading.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1343c591642dd45e5b33cb292e7a2bc7972ba0c1 Binary files /dev/null and b/langchain/utilities/__pycache__/loading.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/searx_search.cpython-39.pyc b/langchain/utilities/__pycache__/searx_search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f80e15d287d086a9172b8589651d2ebaa647375 Binary files /dev/null and b/langchain/utilities/__pycache__/searx_search.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/serpapi.cpython-39.pyc b/langchain/utilities/__pycache__/serpapi.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6f4b4fcc24fab5977831ddf6861e0679feb1516 Binary files /dev/null and b/langchain/utilities/__pycache__/serpapi.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/wikipedia.cpython-39.pyc b/langchain/utilities/__pycache__/wikipedia.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ece350b80a7ef8d9801d7d8e2e963abb160a6b7 Binary files /dev/null and b/langchain/utilities/__pycache__/wikipedia.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/wolfram_alpha.cpython-39.pyc b/langchain/utilities/__pycache__/wolfram_alpha.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c4e6fd52e70ab4da4a5d40932faa8d8c42c8567 Binary files /dev/null and b/langchain/utilities/__pycache__/wolfram_alpha.cpython-39.pyc differ diff --git a/langchain/utilities/__pycache__/zapier.cpython-39.pyc b/langchain/utilities/__pycache__/zapier.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..228e09ec4a5e1486ffe02fcffe21d4228ba934eb Binary files /dev/null and b/langchain/utilities/__pycache__/zapier.cpython-39.pyc differ diff --git a/langchain/utilities/bash.py b/langchain/utilities/bash.py new file mode 100644 index 0000000000000000000000000000000000000000..d4bcf73d9d4fc55be58c468b8e779469ad7f82ef --- /dev/null +++ b/langchain/utilities/bash.py @@ -0,0 +1,33 @@ +"""Wrapper around subprocess to run commands.""" +import subprocess +from typing import List, Union + + +class BashProcess: + """Executes bash commands and returns the output.""" + + def __init__(self, strip_newlines: bool = False, return_err_output: bool = False): + """Initialize with stripping newlines.""" + self.strip_newlines = strip_newlines + self.return_err_output = return_err_output + + def run(self, commands: Union[str, List[str]]) -> str: + """Run commands and return final output.""" + if isinstance(commands, str): + commands = [commands] + commands = ";".join(commands) + try: + output = subprocess.run( + commands, + shell=True, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ).stdout.decode() + except subprocess.CalledProcessError as error: + if self.return_err_output: + return error.stdout.decode() + return str(error) + if self.strip_newlines: + output = output.strip() + return output diff --git a/langchain/utilities/bing_search.py b/langchain/utilities/bing_search.py new file mode 100644 index 0000000000000000000000000000000000000000..6dc2fe4073edb7ac6ec317dd02fb51d3cba15f05 --- /dev/null +++ b/langchain/utilities/bing_search.py @@ -0,0 +1,100 @@ +"""Util that calls Bing Search. + +In order to set this up, follow instructions at: +https://levelup.gitconnected.com/api-tutorial-how-to-use-bing-web-search-api-in-python-4165d5592a7e +""" +from typing import Dict, List + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + + +class BingSearchAPIWrapper(BaseModel): + """Wrapper for Bing Search API. + + In order to set this up, follow instructions at: + https://levelup.gitconnected.com/api-tutorial-how-to-use-bing-web-search-api-in-python-4165d5592a7e + """ + + bing_subscription_key: str + bing_search_url: str + k: int = 10 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _bing_search_results(self, search_term: str, count: int) -> List[dict]: + headers = {"Ocp-Apim-Subscription-Key": self.bing_subscription_key} + params = { + "q": search_term, + "count": count, + "textDecorations": True, + "textFormat": "HTML", + } + response = requests.get( + self.bing_search_url, headers=headers, params=params # type: ignore + ) + response.raise_for_status() + search_results = response.json() + return search_results["webPages"]["value"] + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and endpoint exists in environment.""" + bing_subscription_key = get_from_dict_or_env( + values, "bing_subscription_key", "BING_SUBSCRIPTION_KEY" + ) + values["bing_subscription_key"] = bing_subscription_key + + bing_search_url = get_from_dict_or_env( + values, + "bing_search_url", + "BING_SEARCH_URL", + # default="https://api.bing.microsoft.com/v7.0/search", + ) + + values["bing_search_url"] = bing_search_url + + return values + + def run(self, query: str) -> str: + """Run query through BingSearch and parse result.""" + snippets = [] + results = self._bing_search_results(query, count=self.k) + if len(results) == 0: + return "No good Bing Search Result was found" + for result in results: + snippets.append(result["snippet"]) + + return " ".join(snippets) + + def results(self, query: str, num_results: int) -> List[Dict]: + """Run query through BingSearch and return metadata. + + Args: + query: The query to search for. + num_results: The number of results to return. + + Returns: + A list of dictionaries with the following keys: + snippet - The description of the result. + title - The title of the result. + link - The link to the result. + """ + metadata_results = [] + results = self._bing_search_results(query, count=num_results) + if len(results) == 0: + return [{"Result": "No good Bing Search Result was found"}] + for result in results: + metadata_result = { + "snippet": result["snippet"], + "title": result["name"], + "link": result["url"], + } + metadata_results.append(metadata_result) + + return metadata_results diff --git a/langchain/utilities/google_search.py b/langchain/utilities/google_search.py new file mode 100644 index 0000000000000000000000000000000000000000..9b1ec5b6fd86cfc157ef774e8bf975044ac9ea34 --- /dev/null +++ b/langchain/utilities/google_search.py @@ -0,0 +1,129 @@ +"""Util that calls Google Search.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + + +class GoogleSearchAPIWrapper(BaseModel): + """Wrapper for Google Search API. + + Adapted from: Instructions adapted from https://stackoverflow.com/questions/ + 37083058/ + programmatically-searching-google-in-python-using-custom-search + + TODO: DOCS for using it + 1. Install google-api-python-client + - If you don't already have a Google account, sign up. + - If you have never created a Google APIs Console project, + read the Managing Projects page and create a project in the Google API Console. + - Install the library using pip install google-api-python-client + The current version of the library is 2.70.0 at this time + + 2. To create an API key: + - Navigate to the APIs & Services→Credentials panel in Cloud Console. + - Select Create credentials, then select API key from the drop-down menu. + - The API key created dialog box displays your newly created key. + - You now have an API_KEY + + 3. Setup Custom Search Engine so you can search the entire web + - Create a custom search engine in this link. + - In Sites to search, add any valid URL (i.e. www.stackoverflow.com). + - That’s all you have to fill up, the rest doesn’t matter. + In the left-side menu, click Edit search engine → {your search engine name} + → Setup Set Search the entire web to ON. Remove the URL you added from + the list of Sites to search. + - Under Search engine ID you’ll find the search-engine-ID. + + 4. Enable the Custom Search API + - Navigate to the APIs & Services→Dashboard panel in Cloud Console. + - Click Enable APIs and Services. + - Search for Custom Search API and click on it. + - Click Enable. + URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis + .com + """ + + search_engine: Any #: :meta private: + google_api_key: Optional[str] = None + google_cse_id: Optional[str] = None + k: int = 10 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]: + res = ( + self.search_engine.cse() + .list(q=search_term, cx=self.google_cse_id, **kwargs) + .execute() + ) + return res.get("items", []) + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + google_api_key = get_from_dict_or_env( + values, "google_api_key", "GOOGLE_API_KEY" + ) + values["google_api_key"] = google_api_key + + google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID") + values["google_cse_id"] = google_cse_id + + try: + from googleapiclient.discovery import build + + except ImportError: + raise ImportError( + "google-api-python-client is not installed. " + "Please install it with `pip install google-api-python-client`" + ) + + service = build("customsearch", "v1", developerKey=google_api_key) + values["search_engine"] = service + + return values + + def run(self, query: str) -> str: + """Run query through GoogleSearch and parse result.""" + snippets = [] + results = self._google_search_results(query, num=self.k) + if len(results) == 0: + return "No good Google Search Result was found" + for result in results: + if "snippet" in result: + snippets.append(result["snippet"]) + + return " ".join(snippets) + + def results(self, query: str, num_results: int) -> List[Dict]: + """Run query through GoogleSearch and return metadata. + + Args: + query: The query to search for. + num_results: The number of results to return. + + Returns: + A list of dictionaries with the following keys: + snippet - The description of the result. + title - The title of the result. + link - The link to the result. + """ + metadata_results = [] + results = self._google_search_results(query, num=num_results) + if len(results) == 0: + return [{"Result": "No good Google Search Result was found"}] + for result in results: + metadata_result = { + "title": result["title"], + "link": result["link"], + } + if "snippet" in result: + metadata_result["snippet"] = result["snippet"] + metadata_results.append(metadata_result) + + return metadata_results diff --git a/langchain/utilities/google_serper.py b/langchain/utilities/google_serper.py new file mode 100644 index 0000000000000000000000000000000000000000..9d8126a3fb6dc881dacd420994f93038ed7d00e0 --- /dev/null +++ b/langchain/utilities/google_serper.py @@ -0,0 +1,94 @@ +"""Util that calls Google Search using the Serper.dev API.""" +from typing import Dict, Optional + +import requests +from pydantic.class_validators import root_validator +from pydantic.main import BaseModel + +from langchain.utils import get_from_dict_or_env + + +class GoogleSerperAPIWrapper(BaseModel): + """Wrapper around the Serper.dev Google Search API. + + You can create a free API key at https://serper.dev. + + To use, you should have the environment variable ``SERPER_API_KEY`` + set with your API key, or pass `serper_api_key` as a named parameter + to the constructor. + + Example: + .. code-block:: python + + from langchain import GoogleSerperAPIWrapper + google_serper = GoogleSerperAPIWrapper() + """ + + k: int = 10 + gl: str = "us" + hl: str = "en" + serper_api_key: Optional[str] = None + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + serper_api_key = get_from_dict_or_env( + values, "serper_api_key", "SERPER_API_KEY" + ) + values["serper_api_key"] = serper_api_key + + return values + + def run(self, query: str) -> str: + """Run query through GoogleSearch and parse result.""" + results = self._google_serper_search_results(query, gl=self.gl, hl=self.hl) + + return self._parse_results(results) + + def _parse_results(self, results: dict) -> str: + snippets = [] + + if results.get("answerBox"): + answer_box = results.get("answerBox", {}) + if answer_box.get("answer"): + return answer_box.get("answer") + elif answer_box.get("snippet"): + return answer_box.get("snippet").replace("\n", " ") + elif answer_box.get("snippetHighlighted"): + return ", ".join(answer_box.get("snippetHighlighted")) + + if results.get("knowledgeGraph"): + kg = results.get("knowledgeGraph", {}) + title = kg.get("title") + entity_type = kg.get("type") + if entity_type: + snippets.append(f"{title}: {entity_type}.") + description = kg.get("description") + if description: + snippets.append(description) + for attribute, value in kg.get("attributes", {}).items(): + snippets.append(f"{title} {attribute}: {value}.") + + for result in results["organic"][: self.k]: + if "snippet" in result: + snippets.append(result["snippet"]) + for attribute, value in result.get("attributes", {}).items(): + snippets.append(f"{attribute}: {value}.") + + if len(snippets) == 0: + return "No good Google Search Result was found" + + return " ".join(snippets) + + def _google_serper_search_results(self, search_term: str, gl: str, hl: str) -> dict: + headers = { + "X-API-KEY": self.serper_api_key or "", + "Content-Type": "application/json", + } + params = {"q": search_term, "gl": gl, "hl": hl} + response = requests.post( + "https://google.serper.dev/search", headers=headers, params=params + ) + response.raise_for_status() + search_results = response.json() + return search_results diff --git a/langchain/utilities/loading.py b/langchain/utilities/loading.py new file mode 100644 index 0000000000000000000000000000000000000000..45569e4a1dd85ef34133cf67701608ae28f1587a --- /dev/null +++ b/langchain/utilities/loading.py @@ -0,0 +1,49 @@ +"""Utilities for loading configurations from langchain-hub.""" + +import os +import re +import tempfile +from pathlib import Path +from typing import Any, Callable, Optional, Set, TypeVar, Union +from urllib.parse import urljoin + +import requests + +DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master") +URL_BASE = os.environ.get( + "LANGCHAIN_HUB_URL_BASE", + "https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/", +) +HUB_PATH_RE = re.compile(r"lc(?P@[^:]+)?://(?P.*)") + + +T = TypeVar("T") + + +def try_load_from_hub( + path: Union[str, Path], + loader: Callable[[str], T], + valid_prefix: str, + valid_suffixes: Set[str], + **kwargs: Any, +) -> Optional[T]: + """Load configuration from hub. Returns None if path is not a hub path.""" + if not isinstance(path, str) or not (match := HUB_PATH_RE.match(path)): + return None + ref, remote_path_str = match.groups() + ref = ref[1:] if ref else DEFAULT_REF + remote_path = Path(remote_path_str) + if remote_path.parts[0] != valid_prefix: + return None + if remote_path.suffix[1:] not in valid_suffixes: + raise ValueError("Unsupported file type.") + + full_url = urljoin(URL_BASE.format(ref=ref), str(remote_path)) + r = requests.get(full_url, timeout=5) + if r.status_code != 200: + raise ValueError(f"Could not find file at {full_url}") + with tempfile.TemporaryDirectory() as tmpdirname: + file = Path(tmpdirname) / remote_path.name + with open(file, "wb") as f: + f.write(r.content) + return loader(str(file), **kwargs) diff --git a/langchain/utilities/searx_search.py b/langchain/utilities/searx_search.py new file mode 100644 index 0000000000000000000000000000000000000000..b171a0d75bed93f57f1510ce5acfc7c0abebb6e5 --- /dev/null +++ b/langchain/utilities/searx_search.py @@ -0,0 +1,385 @@ +"""Utility for using SearxNG meta search API. + +SearxNG is a privacy-friendly free metasearch engine that aggregates results from +`multiple search engines +`_ and databases and +supports the `OpenSearch +`_ +specification. + +More detailes on the installtion instructions `here. <../../ecosystem/searx.html>`_ + +For the search API refer to https://docs.searxng.org/dev/search_api.html + +Quick Start +----------- + + +In order to use this tool you need to provide the searx host. This can be done +by passing the named parameter :attr:`searx_host ` +or exporting the environment variable SEARX_HOST. +Note: this is the only required parameter. + +Then create a searx search instance like this: + + .. code-block:: python + + from langchain.utilities import SearxSearchWrapper + + # when the host starts with `http` SSL is disabled and the connection + # is assumed to be on a private network + searx_host='http://self.hosted' + + search = SearxSearchWrapper(searx_host=searx_host) + + +You can now use the ``search`` instance to query the searx API. + +Searching +--------- + +Use the :meth:`run() ` and +:meth:`results() ` methods to query the searx API. +Other methods are are available for convenience. + +:class:`SearxResults` is a convenience wrapper around the raw json result. + +Example usage of the ``run`` method to make a search: + + .. code-block:: python + + s.run(query="what is the best search engine?") + +Engine Parameters +----------------- + +You can pass any `accepted searx search API +`_ parameters to the +:py:class:`SearxSearchWrapper` instance. + +In the following example we are using the +:attr:`engines ` and the ``language`` parameters: + + .. code-block:: python + + # assuming the searx host is set as above or exported as an env variable + s = SearxSearchWrapper(engines=['google', 'bing'], + language='es') + +Search Tips +----------- + +Searx offers a special +`search syntax `_ +that can also be used instead of passing engine parameters. + +For example the following query: + + .. code-block:: python + + s = SearxSearchWrapper("langchain library", engines=['github']) + + # can also be written as: + s = SearxSearchWrapper("langchain library !github") + # or even: + s = SearxSearchWrapper("langchain library !gh") + + +In some situations you might want to pass an extra string to the search query. +For example when the `run()` method is called by an agent. The search suffix can +also be used as a way to pass extra parameters to searx or the underlying search +engines. + + .. code-block:: python + + # select the github engine and pass the search suffix + s = SearchWrapper("langchain library", query_suffix="!gh") + + + s = SearchWrapper("langchain library") + # select github the conventional google search syntax + s.run("large language models", query_suffix="site:github.com") + + +*NOTE*: A search suffix can be defined on both the instance and the method level. +The resulting query will be the concatenation of the two with the former taking +precedence. + + +See `SearxNG Configured Engines +`_ and +`SearxNG Search Syntax `_ +for more details. + +Notes +----- +This wrapper is based on the SearxNG fork https://github.com/searxng/searxng which is +better maintained than the original Searx project and offers more features. + +Public searxNG instances often use a rate limiter for API usage, so you might want to +use a self hosted instance and disable the rate limiter. + +If you are self-hosting an instance you can customize the rate limiter for your +own network as described `here `_. + + +For a list of public SearxNG instances see https://searx.space/ +""" + +import json +from typing import Any, Dict, List, Optional + +import requests +from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator + +from langchain.utils import get_from_dict_or_env + + +def _get_default_params() -> dict: + return {"language": "en", "format": "json"} + + +class SearxResults(dict): + """Dict like wrapper around search api results.""" + + _data = "" + + def __init__(self, data: str): + """Take a raw result from Searx and make it into a dict like object.""" + json_data = json.loads(data) + super().__init__(json_data) + self.__dict__ = self + + def __str__(self) -> str: + """Text representation of searx result.""" + return self._data + + @property + def results(self) -> Any: + """Silence mypy for accessing this field. + + :meta private: + """ + return self.get("results") + + @property + def answers(self) -> Any: + """Helper accessor on the json result.""" + return self.get("answers") + + +class SearxSearchWrapper(BaseModel): + """Wrapper for Searx API. + + To use you need to provide the searx host by passing the named parameter + ``searx_host`` or exporting the environment variable ``SEARX_HOST``. + + In some situations you might want to disable SSL verification, for example + if you are running searx locally. You can do this by passing the named parameter + ``unsecure``. You can also pass the host url scheme as ``http`` to disable SSL. + + Example: + .. code-block:: python + + from langchain.utilities import SearxSearchWrapper + searx = SearxSearchWrapper(searx_host="http://localhost:8888") + + Example with SSL disabled: + .. code-block:: python + + from langchain.utilities import SearxSearchWrapper + # note the unsecure parameter is not needed if you pass the url scheme as + # http + searx = SearxSearchWrapper(searx_host="http://localhost:8888", + unsecure=True) + + + """ + + _result: SearxResults = PrivateAttr() + searx_host: str = "" + unsecure: bool = False + params: dict = Field(default_factory=_get_default_params) + headers: Optional[dict] = None + engines: Optional[List[str]] = [] + query_suffix: Optional[str] = "" + k: int = 10 + + @validator("unsecure") + def disable_ssl_warnings(cls, v: bool) -> bool: + """Disable SSL warnings.""" + if v: + # requests.urllib3.disable_warnings() + try: + import urllib3 + + urllib3.disable_warnings() + except ImportError as e: + print(e) + + return v + + @root_validator() + def validate_params(cls, values: Dict) -> Dict: + """Validate that custom searx params are merged with default ones.""" + user_params = values["params"] + default = _get_default_params() + values["params"] = {**default, **user_params} + + engines = values.get("engines") + if engines: + values["params"]["engines"] = ",".join(engines) + + searx_host = get_from_dict_or_env(values, "searx_host", "SEARX_HOST") + if not searx_host.startswith("http"): + print( + f"Warning: missing the url scheme on host \ + ! assuming secure https://{searx_host} " + ) + searx_host = "https://" + searx_host + elif searx_host.startswith("http://"): + values["unsecure"] = True + cls.disable_ssl_warnings(True) + values["searx_host"] = searx_host + + return values + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _searx_api_query(self, params: dict) -> SearxResults: + """Actual request to searx API.""" + raw_result = requests.get( + self.searx_host, + headers=self.headers, + params=params, + verify=not self.unsecure, + ) + # test if http result is ok + if not raw_result.ok: + raise ValueError("Searx API returned an error: ", raw_result.text) + res = SearxResults(raw_result.text) + self._result = res + return res + + def run( + self, + query: str, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, + ) -> str: + """Run query through Searx API and parse results. + + You can pass any other params to the searx query API. + + Args: + query: The query to search for. + query_suffix: Extra suffix appended to the query. + engines: List of engines to use for the query. + **kwargs: extra parameters to pass to the searx API. + + Example: + This will make a query to the qwant engine: + + .. code-block:: python + + from langchain.utilities import SearxSearchWrapper + searx = SearxSearchWrapper(searx_host="http://my.searx.host") + searx.run("what is the weather in France ?", engine="qwant") + + # the same result can be achieved using the `!` syntax of searx + # to select the engine using `query_suffix` + searx.run("what is the weather in France ?", query_suffix="!qwant") + """ + _params = { + "q": query, + } + params = {**self.params, **_params, **kwargs} + + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix + + if isinstance(engines, list) and len(engines) > 0: + params["engines"] = ",".join(engines) + + res = self._searx_api_query(params) + + if len(res.answers) > 0: + toret = res.answers[0] + + # only return the content of the results list + elif len(res.results) > 0: + toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]]) + else: + toret = "No good search result found" + + return toret + + def results( + self, + query: str, + num_results: int, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, + ) -> List[Dict]: + """Run query through Searx API and returns the results with metadata. + + Args: + query: The query to search for. + + query_suffix: Extra suffix appended to the query. + + num_results: Limit the number of results to return. + + engines: List of engines to use for the query. + + **kwargs: extra parameters to pass to the searx API. + + Returns: + Dict with the following keys: + + { + snippet: The description of the result. + + title: The title of the result. + + link: The link to the result. + + engines: The engines used for the result. + + category: Searx category of the result. + } + + + """ + _params = { + "q": query, + } + params = {**self.params, **_params, **kwargs} + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix + if isinstance(engines, list) and len(engines) > 0: + params["engines"] = ",".join(engines) + results = self._searx_api_query(params).results[:num_results] + if len(results) == 0: + return [{"Result": "No good Search Result was found"}] + + return [ + { + "snippet": result.get("content", ""), + "title": result["title"], + "link": result["url"], + "engines": result["engines"], + "category": result["category"], + } + for result in results + ] diff --git a/langchain/utilities/serpapi.py b/langchain/utilities/serpapi.py new file mode 100644 index 0000000000000000000000000000000000000000..3a35a711f4c68e512d7ab13eb9f08e7f5ab5e981 --- /dev/null +++ b/langchain/utilities/serpapi.py @@ -0,0 +1,152 @@ +"""Chain that calls SerpAPI. + +Heavily borrowed from https://github.com/ofirpress/self-ask +""" +import os +import sys +from typing import Any, Dict, Optional, Tuple + +import aiohttp +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.utils import get_from_dict_or_env + + +class HiddenPrints: + """Context manager to hide prints.""" + + def __enter__(self) -> None: + """Open file to pipe stdout to.""" + self._original_stdout = sys.stdout + sys.stdout = open(os.devnull, "w") + + def __exit__(self, *_: Any) -> None: + """Close file that stdout was piped to.""" + sys.stdout.close() + sys.stdout = self._original_stdout + + +class SerpAPIWrapper(BaseModel): + """Wrapper around SerpAPI. + + To use, you should have the ``google-search-results`` python package installed, + and the environment variable ``SERPAPI_API_KEY`` set with your API key, or pass + `serpapi_api_key` as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain import SerpAPIWrapper + serpapi = SerpAPIWrapper() + """ + + search_engine: Any #: :meta private: + params: dict = Field( + default={ + "engine": "google", + "google_domain": "google.com", + "gl": "us", + "hl": "en", + } + ) + serpapi_api_key: Optional[str] = None + aiosession: Optional[aiohttp.ClientSession] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + serpapi_api_key = get_from_dict_or_env( + values, "serpapi_api_key", "SERPAPI_API_KEY" + ) + values["serpapi_api_key"] = serpapi_api_key + try: + from serpapi import GoogleSearch + + values["search_engine"] = GoogleSearch + except ImportError: + raise ValueError( + "Could not import serpapi python package. " + "Please it install it with `pip install google-search-results`." + ) + return values + + async def arun(self, query: str) -> str: + """Use aiohttp to run query through SerpAPI and parse result.""" + + def construct_url_and_params() -> Tuple[str, Dict[str, str]]: + params = self.get_params(query) + params["source"] = "python" + if self.serpapi_api_key: + params["serp_api_key"] = self.serpapi_api_key + params["output"] = "json" + url = "https://serpapi.com/search" + return url, params + + url, params = construct_url_and_params() + if not self.aiosession: + async with aiohttp.ClientSession() as session: + async with session.get(url, params=params) as response: + res = await response.json() + else: + async with self.aiosession.get(url, params=params) as response: + res = await response.json() + + return self._process_response(res) + + def run(self, query: str) -> str: + """Run query through SerpAPI and parse result.""" + return self._process_response(self.results(query)) + + def results(self, query: str) -> dict: + """Run query through SerpAPI and return the raw result.""" + params = self.get_params(query) + with HiddenPrints(): + search = self.search_engine(params) + res = search.get_dict() + return res + + def get_params(self, query: str) -> Dict[str, str]: + """Get parameters for SerpAPI.""" + _params = { + "api_key": self.serpapi_api_key, + "q": query, + } + params = {**self.params, **_params} + return params + + @staticmethod + def _process_response(res: dict) -> str: + """Process response from SerpAPI.""" + if "error" in res.keys(): + raise ValueError(f"Got error from SerpAPI: {res['error']}") + if "answer_box" in res.keys() and "answer" in res["answer_box"].keys(): + toret = res["answer_box"]["answer"] + elif "answer_box" in res.keys() and "snippet" in res["answer_box"].keys(): + toret = res["answer_box"]["snippet"] + elif ( + "answer_box" in res.keys() + and "snippet_highlighted_words" in res["answer_box"].keys() + ): + toret = res["answer_box"]["snippet_highlighted_words"][0] + elif ( + "sports_results" in res.keys() + and "game_spotlight" in res["sports_results"].keys() + ): + toret = res["sports_results"]["game_spotlight"] + elif ( + "knowledge_graph" in res.keys() + and "description" in res["knowledge_graph"].keys() + ): + toret = res["knowledge_graph"]["description"] + elif "snippet" in res["organic_results"][0].keys(): + toret = res["organic_results"][0]["snippet"] + + else: + toret = "No good search result found" + return toret diff --git a/langchain/utilities/wikipedia.py b/langchain/utilities/wikipedia.py new file mode 100644 index 0000000000000000000000000000000000000000..2e2bd21ad37cda5b86c1df5dec5803b23a553b12 --- /dev/null +++ b/langchain/utilities/wikipedia.py @@ -0,0 +1,56 @@ +"""Util that calls Wikipedia.""" +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Extra, root_validator + + +class WikipediaAPIWrapper(BaseModel): + """Wrapper around WikipediaAPI. + + To use, you should have the ``wikipedia`` python package installed. + This wrapper will use the Wikipedia API to conduct searches and + fetch page summaries. By default, it will return the page summaries + of the top-k results of an input search. + """ + + wiki_client: Any #: :meta private: + top_k_results: int = 3 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that the python package exists in environment.""" + try: + import wikipedia + + values["wiki_client"] = wikipedia + except ImportError: + raise ValueError( + "Could not import wikipedia python package. " + "Please it install it with `pip install wikipedia`." + ) + return values + + def run(self, query: str) -> str: + """Run Wikipedia search and get page summaries.""" + search_results = self.wiki_client.search(query) + summaries = [] + for i in range(min(self.top_k_results, len(search_results))): + summary = self.fetch_formatted_page_summary(search_results[i]) + if summary is not None: + summaries.append(summary) + return "\n\n".join(summaries) + + def fetch_formatted_page_summary(self, page: str) -> Optional[str]: + try: + wiki_page = self.wiki_client.page(title=page) + return f"Page: {page}\nSummary: {wiki_page.summary}" + except ( + self.wiki_client.exceptions.PageError, + self.wiki_client.exceptions.DisambiguationError, + ): + return None diff --git a/langchain/utilities/wolfram_alpha.py b/langchain/utilities/wolfram_alpha.py new file mode 100644 index 0000000000000000000000000000000000000000..a27aec051f40b1693ef60247f71143dcc93585b9 --- /dev/null +++ b/langchain/utilities/wolfram_alpha.py @@ -0,0 +1,64 @@ +"""Util that calls WolframAlpha.""" +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + + +class WolframAlphaAPIWrapper(BaseModel): + """Wrapper for Wolfram Alpha. + + Docs for using: + + 1. Go to wolfram alpha and sign up for a developer account + 2. Create an app and get your APP ID + 3. Save your APP ID into WOLFRAM_ALPHA_APPID env variable + 4. pip install wolframalpha + + """ + + wolfram_client: Any #: :meta private: + wolfram_alpha_appid: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + wolfram_alpha_appid = get_from_dict_or_env( + values, "wolfram_alpha_appid", "WOLFRAM_ALPHA_APPID" + ) + values["wolfram_alpha_appid"] = wolfram_alpha_appid + + try: + import wolframalpha + + except ImportError: + raise ImportError( + "wolframalpha is not installed. " + "Please install it with `pip install wolframalpha`" + ) + client = wolframalpha.Client(wolfram_alpha_appid) + values["wolfram_client"] = client + + return values + + def run(self, query: str) -> str: + """Run query through WolframAlpha and parse result.""" + res = self.wolfram_client.query(query) + + try: + assumption = next(res.pods).text + answer = next(res.results).text + except StopIteration: + return "Wolfram Alpha wasn't able to answer it" + + if answer is None or answer == "": + # We don't want to return the assumption alone if answer is empty + return "No good Wolfram Alpha Result was found" + else: + return f"Assumption: {assumption} \nAnswer: {answer}" diff --git a/langchain/utilities/zapier.py b/langchain/utilities/zapier.py new file mode 100644 index 0000000000000000000000000000000000000000..d2d3ed74e3a59c965c6707c3f54228b6c83d02a9 --- /dev/null +++ b/langchain/utilities/zapier.py @@ -0,0 +1,154 @@ +"""Util that can interact with Zapier NLA. + +Full docs here: https://nla.zapier.com/api/v1/docs + +Note: this wrapper currently only implemented the `api_key` auth method for testing +and server-side production use cases (using the developer's connected accounts on +Zapier.com) + +For use-cases where LangChain + Zapier NLA is powering a user-facing application, and +LangChain needs access to the end-user's connected accounts on Zapier.com, you'll need +to use oauth. Review the full docs above and reach out to nla@zapier.com for +developer support. +""" +import json +from typing import Dict, List, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator +from requests import Request, Session + +from langchain.utils import get_from_dict_or_env + + +class ZapierNLAWrapper(BaseModel): + """Wrapper for Zapier NLA. + + Full docs here: https://nla.zapier.com/api/v1/docs + + Note: this wrapper currently only implemented the `api_key` auth method for + testingand server-side production use cases (using the developer's connected + accounts on Zapier.com) + + For use-cases where LangChain + Zapier NLA is powering a user-facing application, + and LangChain needs access to the end-user's connected accounts on Zapier.com, + you'll need to use oauth. Review the full docs above and reach out to + nla@zapier.com for developer support. + """ + + zapier_nla_api_key: str + zapier_nla_api_base: str = "https://nla.zapier.com/api/v1/" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _get_session(self) -> Session: + session = requests.Session() + session.headers.update( + { + "Accept": "application/json", + "Content-Type": "application/json", + } + ) + session.params = {"api_key": self.zapier_nla_api_key} + return session + + def _get_action_request( + self, action_id: str, instructions: str, params: Optional[Dict] = None + ) -> Request: + data = params if params else {} + data.update( + { + "instructions": instructions, + } + ) + return Request( + "POST", + self.zapier_nla_api_base + f"exposed/{action_id}/execute/", + json=data, + ) + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + zapier_nla_api_key = get_from_dict_or_env( + values, "zapier_nla_api_key", "ZAPIER_NLA_API_KEY" + ) + values["zapier_nla_api_key"] = zapier_nla_api_key + + return values + + def list(self) -> List[Dict]: + """Returns a list of all exposed (enabled) actions associated with + current user (associated with the set api_key). Change your exposed + actions here: https://nla.zapier.com/demo/start/ + + The return list can be empty if no actions exposed. Else will contain + a list of action objects: + + [{ + "id": str, + "description": str, + "params": Dict[str, str] + }] + + `params` will always contain an `instructions` key, the only required + param. All others optional and if provided will override any AI guesses + (see "understanding the AI guessing flow" here: + https://nla.zapier.com/api/v1/docs) + """ + session = self._get_session() + response = session.get(self.zapier_nla_api_base + "exposed/") + response.raise_for_status() + return response.json()["results"] + + def run( + self, action_id: str, instructions: str, params: Optional[Dict] = None + ) -> Dict: + """Executes an action that is identified by action_id, must be exposed + (enabled) by the current user (associated with the set api_key). Change + your exposed actions here: https://nla.zapier.com/demo/start/ + + The return JSON is guaranteed to be less than ~500 words (350 + tokens) making it safe to inject into the prompt of another LLM + call. + """ + session = self._get_session() + request = self._get_action_request(action_id, instructions, params) + response = session.send(session.prepare_request(request)) + response.raise_for_status() + return response.json()["result"] + + def preview( + self, action_id: str, instructions: str, params: Optional[Dict] = None + ) -> Dict: + """Same as run, but instead of actually executing the action, will + instead return a preview of params that have been guessed by the AI in + case you need to explicitly review before executing.""" + session = self._get_session() + params = params if params else {} + params.update({"preview_only": True}) + request = self._get_action_request(action_id, instructions, params) + response = session.send(session.prepare_request(request)) + response.raise_for_status() + return response.json()["input_params"] + + def run_as_str(self, *args, **kwargs) -> str: # type: ignore[no-untyped-def] + """Same as run, but returns a stringified version of the JSON for + insertting back into an LLM.""" + data = self.run(*args, **kwargs) + return json.dumps(data) + + def preview_as_str(self, *args, **kwargs) -> str: # type: ignore[no-untyped-def] + """Same as preview, but returns a stringified version of the JSON for + insertting back into an LLM.""" + data = self.preview(*args, **kwargs) + return json.dumps(data) + + def list_as_str(self) -> str: # type: ignore[no-untyped-def] + """Same as list, but returns a stringified version of the JSON for + insertting back into an LLM.""" + actions = self.list() + return json.dumps(actions) diff --git a/langchain/utils.py b/langchain/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..08fa4327904ed53847d8cde1bff0f53c2a5fb482 --- /dev/null +++ b/langchain/utils.py @@ -0,0 +1,21 @@ +"""Generic utility functions.""" +import os +from typing import Any, Dict, Optional + + +def get_from_dict_or_env( + data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None +) -> str: + """Get a value from a dictionary or an environment variable.""" + if key in data and data[key]: + return data[key] + elif env_key in os.environ and os.environ[env_key]: + return os.environ[env_key] + elif default is not None: + return default + else: + raise ValueError( + f"Did not find {key}, please add an environment variable" + f" `{env_key}` which contains it, or pass" + f" `{key}` as a named parameter." + ) diff --git a/langchain/vectorstores/__init__.py b/langchain/vectorstores/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4f86ab6e1b7c793bee5bf9df5edb1844b3f55363 --- /dev/null +++ b/langchain/vectorstores/__init__.py @@ -0,0 +1,26 @@ +"""Wrappers on top of vector stores.""" +from langchain.vectorstores.atlas import AtlasDB +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.chroma import Chroma +from langchain.vectorstores.deeplake import DeepLake +from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch +from langchain.vectorstores.faiss import FAISS +from langchain.vectorstores.milvus import Milvus +from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSearch +from langchain.vectorstores.pinecone import Pinecone +from langchain.vectorstores.qdrant import Qdrant +from langchain.vectorstores.weaviate import Weaviate + +__all__ = [ + "ElasticVectorSearch", + "FAISS", + "VectorStore", + "Pinecone", + "Weaviate", + "Qdrant", + "Milvus", + "Chroma", + "OpenSearchVectorSearch", + "AtlasDB", + "DeepLake", +] diff --git a/langchain/vectorstores/__pycache__/__init__.cpython-39.pyc b/langchain/vectorstores/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..611ba530cc6780de833bb3980bebdd14d355ffec Binary files /dev/null and b/langchain/vectorstores/__pycache__/__init__.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/atlas.cpython-39.pyc b/langchain/vectorstores/__pycache__/atlas.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7f0422dda493a41b10b03cef84ff7b7519de456 Binary files /dev/null and b/langchain/vectorstores/__pycache__/atlas.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/base.cpython-39.pyc b/langchain/vectorstores/__pycache__/base.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10a74c3bb311b5e699205957fe6131c47c803b6c Binary files /dev/null and b/langchain/vectorstores/__pycache__/base.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/chroma.cpython-39.pyc b/langchain/vectorstores/__pycache__/chroma.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..805db9ed05c45844f0b3953728565ec5bcd1c1db Binary files /dev/null and b/langchain/vectorstores/__pycache__/chroma.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/deeplake.cpython-39.pyc b/langchain/vectorstores/__pycache__/deeplake.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45597ff0fb986867808d5f497a439490466e3e6f Binary files /dev/null and b/langchain/vectorstores/__pycache__/deeplake.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/elastic_vector_search.cpython-39.pyc b/langchain/vectorstores/__pycache__/elastic_vector_search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f383a6327127244c6add046473a4da464d26218 Binary files /dev/null and b/langchain/vectorstores/__pycache__/elastic_vector_search.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/faiss.cpython-39.pyc b/langchain/vectorstores/__pycache__/faiss.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62b2bf70e8b2472b83b442f390ac5718a85ac1b5 Binary files /dev/null and b/langchain/vectorstores/__pycache__/faiss.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/milvus.cpython-39.pyc b/langchain/vectorstores/__pycache__/milvus.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85cd917ef14facecdda79516a3b7b6fb92b14033 Binary files /dev/null and b/langchain/vectorstores/__pycache__/milvus.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/opensearch_vector_search.cpython-39.pyc b/langchain/vectorstores/__pycache__/opensearch_vector_search.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93064183c02088ccdd4d095620dbcc76eead2057 Binary files /dev/null and b/langchain/vectorstores/__pycache__/opensearch_vector_search.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/pinecone.cpython-39.pyc b/langchain/vectorstores/__pycache__/pinecone.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a472fb9036e3b09868b8c7fb38da3af8441c95f Binary files /dev/null and b/langchain/vectorstores/__pycache__/pinecone.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/qdrant.cpython-39.pyc b/langchain/vectorstores/__pycache__/qdrant.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08db7bfc9c1f1222776ba71c270127a4a85a79b6 Binary files /dev/null and b/langchain/vectorstores/__pycache__/qdrant.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/utils.cpython-39.pyc b/langchain/vectorstores/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0848519bcc905767fcc8e44ca155d6b6c290cd5d Binary files /dev/null and b/langchain/vectorstores/__pycache__/utils.cpython-39.pyc differ diff --git a/langchain/vectorstores/__pycache__/weaviate.cpython-39.pyc b/langchain/vectorstores/__pycache__/weaviate.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d3b16664b00f3a5b2f80b51cacb55f501f570ad Binary files /dev/null and b/langchain/vectorstores/__pycache__/weaviate.cpython-39.pyc differ diff --git a/langchain/vectorstores/atlas.py b/langchain/vectorstores/atlas.py new file mode 100644 index 0000000000000000000000000000000000000000..ce2410b593b3f62fb26815a33f25e3eb7fc460e9 --- /dev/null +++ b/langchain/vectorstores/atlas.py @@ -0,0 +1,322 @@ +"""Wrapper around Atlas by Nomic.""" +from __future__ import annotations + +import logging +import uuid +from typing import Any, Iterable, List, Optional + +import numpy as np + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore + +logger = logging.getLogger() + + +class AtlasDB(VectorStore): + """Wrapper around Atlas: Nomic's neural database and rhizomatic instrument. + + To use, you should have the ``nomic`` python package installed. + + Example: + .. code-block:: python + + from langchain.vectorstores import AtlasDB + from langchain.embeddings.openai import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = AtlasDB("my_project", embeddings.embed_query) + """ + + _ATLAS_DEFAULT_ID_FIELD = "atlas_id" + + def __init__( + self, + name: str, + embedding_function: Optional[Embeddings] = None, + api_key: Optional[str] = None, + description: str = "A description for your project", + is_public: bool = True, + reset_project_if_exists: bool = False, + ) -> None: + """ + Initialize the Atlas Client + + Args: + name (str): The name of your project. If the project already exists, + it will be loaded. + embedding_function (Optional[Callable]): An optional function used for + embedding your data. If None, data will be embedded with + Nomic's embed model. + api_key (str): Your nomic API key + description (str): A description for your project. + is_public (bool): Whether your project is publicly accessible. + True by default. + reset_project_if_exists (bool): Whether to reset this project if it + already exists. Default False. + Generally userful during development and testing. + """ + try: + import nomic + from nomic import AtlasProject + except ImportError: + raise ValueError( + "Could not import nomic python package. " + "Please install it with `pip install nomic`." + ) + + if api_key is None: + raise ValueError("No API key provided. Sign up at atlas.nomic.ai!") + nomic.login(api_key) + + self._embedding_function = embedding_function + modality = "text" + if self._embedding_function is not None: + modality = "embedding" + + # Check if the project exists, create it if not + self.project = AtlasProject( + name=name, + description=description, + modality=modality, + is_public=is_public, + reset_project_if_exists=reset_project_if_exists, + unique_id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD, + ) + self.project._latest_project_state() + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + refresh: bool = True, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]]): An optional list of ids. + refresh(bool): Whether or not to refresh indices with the updated data. + Default True. + Returns: + List[str]: List of IDs of the added texts. + """ + + if ( + metadatas is not None + and len(metadatas) > 0 + and "text" in metadatas[0].keys() + ): + raise ValueError("Cannot accept key text in metadata!") + + texts = list(texts) + if ids is None: + ids = [str(uuid.uuid1()) for _ in texts] + + # Embedding upload case + if self._embedding_function is not None: + _embeddings = self._embedding_function.embed_documents(texts) + embeddings = np.stack(_embeddings) + if metadatas is None: + data = [ + {AtlasDB._ATLAS_DEFAULT_ID_FIELD: ids[i], "text": texts[i]} + for i, _ in enumerate(texts) + ] + else: + for i in range(len(metadatas)): + metadatas[i][AtlasDB._ATLAS_DEFAULT_ID_FIELD] = ids[i] + metadatas[i]["text"] = texts[i] + data = metadatas + + self.project._validate_map_data_inputs( + [], id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD, data=data + ) + with self.project.wait_for_project_lock(): + self.project.add_embeddings(embeddings=embeddings, data=data) + # Text upload case + else: + if metadatas is None: + data = [ + {"text": text, AtlasDB._ATLAS_DEFAULT_ID_FIELD: ids[i]} + for i, text in enumerate(texts) + ] + else: + for i, text in enumerate(texts): + metadatas[i]["text"] = texts + metadatas[i][AtlasDB._ATLAS_DEFAULT_ID_FIELD] = ids[i] + data = metadatas + + self.project._validate_map_data_inputs( + [], id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD, data=data + ) + + with self.project.wait_for_project_lock(): + self.project.add_text(data) + + if refresh: + if len(self.project.indices) > 0: + with self.project.wait_for_project_lock(): + self.project.rebuild_maps() + + return ids + + def create_index(self, **kwargs: Any) -> Any: + """Creates an index in your project. + + See + https://docs.nomic.ai/atlas_api.html#nomic.project.AtlasProject.create_index + for full detail. + """ + with self.project.wait_for_project_lock(): + return self.project.create_index(**kwargs) + + def similarity_search( + self, + query: str, + k: int = 4, + **kwargs: Any, + ) -> List[Document]: + """Run similarity search with AtlasDB + + Args: + query (str): Query text to search for. + k (int): Number of results to return. Defaults to 4. + + Returns: + List[Document]: List of documents most similar to the query text. + """ + if self._embedding_function is None: + raise NotImplementedError( + "AtlasDB requires an embedding_function for text similarity search!" + ) + + _embedding = self._embedding_function.embed_documents([query])[0] + embedding = np.array(_embedding).reshape(1, -1) + with self.project.wait_for_project_lock(): + neighbors, _ = self.project.projections[0].vector_search( + queries=embedding, k=k + ) + datas = self.project.get_data(ids=neighbors[0]) + + docs = [ + Document(page_content=datas[i]["text"], metadata=datas[i]) + for i, neighbor in enumerate(neighbors) + ] + return docs + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Optional[Embeddings] = None, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + name: Optional[str] = None, + api_key: Optional[str] = None, + description: str = "A description for your project", + is_public: bool = True, + reset_project_if_exists: bool = False, + index_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> AtlasDB: + """Create an AtlasDB vectorstore from a raw documents. + + Args: + texts (List[str]): The list of texts to ingest. + name (str): Name of the project to create. + api_key (str): Your nomic API key, + embedding (Optional[Embeddings]): Embedding function. Defaults to None. + metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. + ids (Optional[List[str]]): Optional list of document IDs. If None, + ids will be auto created + description (str): A description for your project. + is_public (bool): Whether your project is publicly accessible. + True by default. + reset_project_if_exists (bool): Whether to reset this project if it + already exists. Default False. + Generally userful during development and testing. + index_kwargs (Optional[dict]): Dict of kwargs for index creation. + See https://docs.nomic.ai/atlas_api.html + + Returns: + AtlasDB: Nomic's neural database and finest rhizomatic instrument + """ + if name is None or api_key is None: + raise ValueError("`name` and `api_key` cannot be None.") + + # Inject relevant kwargs + all_index_kwargs = {"name": name + "_index", "indexed_field": "text"} + if index_kwargs is not None: + for k, v in index_kwargs.items(): + all_index_kwargs[k] = v + + # Build project + atlasDB = cls( + name, + embedding_function=embedding, + api_key=api_key, + description="A description for your project", + is_public=is_public, + reset_project_if_exists=reset_project_if_exists, + ) + with atlasDB.project.wait_for_project_lock(): + atlasDB.add_texts(texts=texts, metadatas=metadatas, ids=ids) + atlasDB.create_index(**all_index_kwargs) + return atlasDB + + @classmethod + def from_documents( + cls, + documents: List[Document], + embedding: Optional[Embeddings] = None, + ids: Optional[List[str]] = None, + name: Optional[str] = None, + api_key: Optional[str] = None, + persist_directory: Optional[str] = None, + description: str = "A description for your project", + is_public: bool = True, + reset_project_if_exists: bool = False, + index_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> AtlasDB: + """Create an AtlasDB vectorstore from a list of documents. + + Args: + name (str): Name of the collection to create. + api_key (str): Your nomic API key, + documents (List[Document]): List of documents to add to the vectorstore. + embedding (Optional[Embeddings]): Embedding function. Defaults to None. + ids (Optional[List[str]]): Optional list of document IDs. If None, + ids will be auto created + description (str): A description for your project. + is_public (bool): Whether your project is publicly accessible. + True by default. + reset_project_if_exists (bool): Whether to reset this project if + it already exists. Default False. + Generally userful during development and testing. + index_kwargs (Optional[dict]): Dict of kwargs for index creation. + See https://docs.nomic.ai/atlas_api.html + + Returns: + AtlasDB: Nomic's neural database and finest rhizomatic instrument + """ + if name is None or api_key is None: + raise ValueError("`name` and `api_key` cannot be None.") + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return cls.from_texts( + name=name, + api_key=api_key, + texts=texts, + embedding=embedding, + metadatas=metadatas, + ids=ids, + description=description, + is_public=is_public, + reset_project_if_exists=reset_project_if_exists, + index_kwargs=index_kwargs, + ) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py new file mode 100644 index 0000000000000000000000000000000000000000..60f4d1e046935fa11750034d8f0015a1319b8e9e --- /dev/null +++ b/langchain/vectorstores/base.py @@ -0,0 +1,143 @@ +"""Interface for vector stores.""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Iterable, List, Optional + +from pydantic import BaseModel, Field + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.schema import BaseRetriever + + +class VectorStore(ABC): + """Interface for vector stores.""" + + @abstractmethod + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + + def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: + """Run more documents through the embeddings and add to the vectorstore. + + Args: + documents (List[Document]: Documents to add to the vectorstore. + + + Returns: + List[str]: List of IDs of the added texts. + """ + # TODO: Handle the case where the user doesn't provide ids on the Collection + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return self.add_texts(texts, metadatas, **kwargs) + + @abstractmethod + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query.""" + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query vector. + """ + raise NotImplementedError + + def max_marginal_relevance_search( + self, query: str, k: int = 4, fetch_k: int = 20 + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + raise NotImplementedError + + def max_marginal_relevance_search_by_vector( + self, embedding: List[float], k: int = 4, fetch_k: int = 20 + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + raise NotImplementedError + + @classmethod + def from_documents( + cls, + documents: List[Document], + embedding: Embeddings, + **kwargs: Any, + ) -> VectorStore: + """Return VectorStore initialized from documents and embeddings.""" + texts = [d.page_content for d in documents] + metadatas = [d.metadata for d in documents] + return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs) + + @classmethod + @abstractmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> VectorStore: + """Return VectorStore initialized from texts and embeddings.""" + + def as_retriever(self) -> VectorStoreRetriever: + return VectorStoreRetriever(vectorstore=self) + + +class VectorStoreRetriever(BaseRetriever, BaseModel): + vectorstore: VectorStore + search_kwargs: dict = Field(default_factory=dict) + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + def get_relevant_texts(self, query: str) -> List[Document]: + return self.vectorstore.similarity_search(query, **self.search_kwargs) diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py new file mode 100644 index 0000000000000000000000000000000000000000..2a0db5898fcdc453891720fcb8b7f27dad46414e --- /dev/null +++ b/langchain/vectorstores/chroma.py @@ -0,0 +1,276 @@ +"""Wrapper around ChromaDB embeddings platform.""" +from __future__ import annotations + +import logging +import uuid +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore + +if TYPE_CHECKING: + import chromadb + import chromadb.config + +logger = logging.getLogger() + + +def _results_to_docs(results: Any) -> List[Document]: + return [doc for doc, _ in _results_to_docs_and_scores(results)] + + +def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]: + return [ + # TODO: Chroma can do batch querying, + # we shouldn't hard code to the 1st result + (Document(page_content=result[0], metadata=result[1] or {}), result[2]) + for result in zip( + results["documents"][0], + results["metadatas"][0], + results["distances"][0], + ) + ] + + +class Chroma(VectorStore): + """Wrapper around ChromaDB embeddings platform. + + To use, you should have the ``chromadb`` python package installed. + + Example: + .. code-block:: python + + from langchain.vectorstores import Chroma + from langchain.embeddings.openai import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = Chroma("langchain_store", embeddings.embed_query) + """ + + _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" + + def __init__( + self, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + embedding_function: Optional[Embeddings] = None, + persist_directory: Optional[str] = None, + client_settings: Optional[chromadb.config.Settings] = None, + ) -> None: + """Initialize with Chroma client.""" + try: + import chromadb + import chromadb.config + except ImportError: + raise ValueError( + "Could not import chromadb python package. " + "Please install it with `pip install chromadb`." + ) + + if client_settings: + self._client_settings = client_settings + else: + self._client_settings = chromadb.config.Settings() + if persist_directory is not None: + self._client_settings = chromadb.config.Settings( + chroma_db_impl="duckdb+parquet", persist_directory=persist_directory + ) + self._client = chromadb.Client(self._client_settings) + self._embedding_function = embedding_function + self._persist_directory = persist_directory + self._collection = self._client.get_or_create_collection( + name=collection_name, + embedding_function=self._embedding_function.embed_documents + if self._embedding_function is not None + else None, + ) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of IDs. + + Returns: + List[str]: List of IDs of the added texts. + """ + # TODO: Handle the case where the user doesn't provide ids on the Collection + if ids is None: + ids = [str(uuid.uuid1()) for _ in texts] + embeddings = None + if self._embedding_function is not None: + embeddings = self._embedding_function.embed_documents(list(texts)) + self._collection.add( + metadatas=metadatas, embeddings=embeddings, documents=texts, ids=ids + ) + return ids + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Document]: + """Run similarity search with Chroma. + + Args: + query (str): Query text to search for. + k (int): Number of results to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List[Document]: List of documents most simmilar to the query text. + """ + docs_and_scores = self.similarity_search_with_score(query, k, where=filter) + return [doc for doc, _ in docs_and_scores] + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to embedding vector. + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + Returns: + List of Documents most similar to the query vector. + """ + results = self._collection.query( + query_embeddings=embedding, n_results=k, where=filter + ) + return _results_to_docs(results) + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Run similarity search with Chroma with distance. + + Args: + query (str): Query text to search for. + k (int): Number of results to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List[Tuple[Document, float]]: List of documents most similar to the query + text with distance in float. + """ + if self._embedding_function is None: + results = self._collection.query( + query_texts=[query], n_results=k, where=filter + ) + else: + query_embedding = self._embedding_function.embed_query(query) + results = self._collection.query( + query_embeddings=[query_embedding], n_results=k, where=filter + ) + + return _results_to_docs_and_scores(results) + + def delete_collection(self) -> None: + """Delete the collection.""" + self._client.delete_collection(self._collection.name) + + def persist(self) -> None: + """Persist the collection. + + This can be used to explicitly persist the data to disk. + It will also be called automatically when the object is destroyed. + """ + if self._persist_directory is None: + raise ValueError( + "You must specify a persist_directory on" + "creation to persist the collection." + ) + self._client.persist() + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Optional[Embeddings] = None, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + persist_directory: Optional[str] = None, + client_settings: Optional[chromadb.config.Settings] = None, + **kwargs: Any, + ) -> Chroma: + """Create a Chroma vectorstore from a raw documents. + + If a persist_directory is specified, the collection will be persisted there. + Otherwise, the data will be ephemeral in-memory. + + Args: + texts (List[str]): List of texts to add to the collection. + collection_name (str): Name of the collection to create. + persist_directory (Optional[str]): Directory to persist the collection. + embedding (Optional[Embeddings]): Embedding function. Defaults to None. + metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. + ids (Optional[List[str]]): List of document IDs. Defaults to None. + client_settings (Optional[chromadb.config.Settings]): Chroma client settings + + Returns: + Chroma: Chroma vectorstore. + """ + chroma_collection = cls( + collection_name=collection_name, + embedding_function=embedding, + persist_directory=persist_directory, + client_settings=client_settings, + ) + chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids) + return chroma_collection + + @classmethod + def from_documents( + cls, + documents: List[Document], + embedding: Optional[Embeddings] = None, + ids: Optional[List[str]] = None, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + persist_directory: Optional[str] = None, + client_settings: Optional[chromadb.config.Settings] = None, + **kwargs: Any, + ) -> Chroma: + """Create a Chroma vectorstore from a list of documents. + + If a persist_directory is specified, the collection will be persisted there. + Otherwise, the data will be ephemeral in-memory. + + Args: + collection_name (str): Name of the collection to create. + persist_directory (Optional[str]): Directory to persist the collection. + ids (Optional[List[str]]): List of document IDs. Defaults to None. + documents (List[Document]): List of documents to add to the vectorstore. + embedding (Optional[Embeddings]): Embedding function. Defaults to None. + client_settings (Optional[chromadb.config.Settings]): Chroma client settings + Returns: + Chroma: Chroma vectorstore. + """ + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return cls.from_texts( + texts=texts, + embedding=embedding, + metadatas=metadatas, + ids=ids, + collection_name=collection_name, + persist_directory=persist_directory, + client_settings=client_settings, + ) diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py new file mode 100644 index 0000000000000000000000000000000000000000..67865bc61134bd1b0b9da8fd7d851e8c3bdebdad --- /dev/null +++ b/langchain/vectorstores/deeplake.py @@ -0,0 +1,211 @@ +"""Wrapper around Activeloop Deep Lake.""" +from __future__ import annotations + +import logging +import uuid +from typing import Any, Iterable, List, Optional, Sequence + +import numpy as np + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore + +logger = logging.getLogger() + + +def L2_search( + query_embedding: np.ndarray, data_vectors: np.ndarray, k: int = 4 +) -> list: + """naive L2 search for nearest neighbors""" + # Calculate the L2 distance between the query_vector and all data_vectors + distances = np.linalg.norm(data_vectors - query_embedding, axis=1) + + # Sort the distances and return the indices of the k nearest vectors + nearest_indices = np.argsort(distances)[:k] + return nearest_indices.tolist() + + +class DeepLake(VectorStore): + """Wrapper around Deep Lake, a data lake for deep learning applications. + + It not only stores embeddings, but also the original data and queries with + version control automatically enabled. + + It is more than just a vector store. You can use the dataset to fine-tune + your own LLM models or use it for other downstream tasks. + + We implement naive similiarity search, but it can be extended with Tensor + Query Language (TQL for production use cases) over billion rows. + + To use, you should have the ``deeplake`` python package installed. + + Example: + .. code-block:: python + + from langchain.vectorstores import DeepLake + from langchain.embeddings.openai import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = DeepLake("langchain_store", embeddings.embed_query) + """ + + _LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "mem://langchain" + + def __init__( + self, + dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH, + token: Optional[str] = None, + embedding_function: Optional[Embeddings] = None, + ) -> None: + """Initialize with Deep Lake client.""" + + try: + import deeplake + except ImportError: + raise ValueError( + "Could not import deeplake python package. " + "Please install it with `pip install deeplake`." + ) + self._deeplake = deeplake + + if deeplake.exists(dataset_path, token=token): + self.ds = deeplake.load(dataset_path, token=token) + logger.warning( + f"Deep Lake Dataset in {dataset_path} already exists, " + f"loading from the storage" + ) + self.ds.summary() + else: + self.ds = deeplake.empty(dataset_path, token=token, overwrite=True) + with self.ds: + self.ds.create_tensor("text", htype="text") + self.ds.create_tensor("metadata", htype="json") + self.ds.create_tensor("embedding", htype="generic") + self.ds.create_tensor("ids", htype="text") + + self._embedding_function = embedding_function + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of IDs. + + Returns: + List[str]: List of IDs of the added texts. + """ + + if ids is None: + ids = [str(uuid.uuid1()) for _ in texts] + + text_list = list(texts) + + if self._embedding_function is None: + embeddings: Sequence[Optional[List[float]]] = [None] * len(text_list) + else: + embeddings = self._embedding_function.embed_documents(text_list) + + if metadatas is None: + metadatas_to_use: Sequence[Optional[dict]] = [None] * len(text_list) + else: + metadatas_to_use = metadatas + + elements = zip(text_list, embeddings, metadatas_to_use, ids) + + @self._deeplake.compute + def ingest(sample_in: list, sample_out: list) -> None: + s = { + "text": sample_in[0], + "embedding": sample_in[1], + "metadata": sample_in[2], + "ids": sample_in[3], + } + sample_out.append(s) + + ingest().eval(list(elements), self.ds) + self.ds.commit() + + return ids + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query.""" + if self._embedding_function is None: + self.ds.summary() + ds_view = self.ds.filter(lambda x: query in x["text"].data()["value"]) + else: + query_emb = np.array(self._embedding_function.embed_query(query)) + embeddings = self.ds.embedding.numpy() + indices = L2_search(query_emb, embeddings, k=k) + ds_view = self.ds[indices] + + docs = [ + Document( + page_content=el["text"].data()["value"], + metadata=el["metadata"].data()["value"], + ) + for el in ds_view + ] + return docs + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Optional[Embeddings] = None, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH, + **kwargs: Any, + ) -> DeepLake: + """Create a Deep Lake dataset from a raw documents. + + If a persist_directory is specified, the collection will be persisted there. + Otherwise, the data will be ephemeral in-memory. + + Args: + path (str, pathlib.Path): - The full path to the dataset. Can be: + - a Deep Lake cloud path of the form ``hub://username/datasetname``. + To write to Deep Lake cloud datasets, + ensure that you are logged in to Deep Lake + (use 'activeloop login' from command line) + - an s3 path of the form ``s3://bucketname/path/to/dataset``. + Credentials are required in either the environment or + passed to the creds argument. + - a local file system path of the form ``./path/to/dataset`` or + ``~/path/to/dataset`` or ``path/to/dataset``. + - a memory path of the form ``mem://path/to/dataset`` which doesn't + save the dataset but keeps it in memory instead. + Should be used only for testing as it does not persist. + documents (List[Document]): List of documents to add. + embedding (Optional[Embeddings]): Embedding function. Defaults to None. + metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. + ids (Optional[List[str]]): List of document IDs. Defaults to None. + + Returns: + DeepLake: Deep Lake dataset. + """ + deeplake_dataset = cls( + dataset_path=dataset_path, + embedding_function=embedding, + ) + deeplake_dataset.add_texts(texts=texts, metadatas=metadatas, ids=ids) + return deeplake_dataset + + def delete_dataset(self) -> None: + """Delete the collection.""" + self.ds.delete() + + def persist(self) -> None: + """Persist the collection.""" + self.ds.flush() diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py new file mode 100644 index 0000000000000000000000000000000000000000..57a73dcdd6200cfa632b166448535b57c794b563 --- /dev/null +++ b/langchain/vectorstores/elastic_vector_search.py @@ -0,0 +1,198 @@ +"""Wrapper around Elasticsearch vector database.""" +from __future__ import annotations + +import uuid +from typing import Any, Dict, Iterable, List, Optional + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env +from langchain.vectorstores.base import VectorStore + + +def _default_text_mapping(dim: int) -> Dict: + return { + "properties": { + "text": {"type": "text"}, + "vector": {"type": "dense_vector", "dims": dim}, + } + } + + +def _default_script_query(query_vector: List[float]) -> Dict: + return { + "script_score": { + "query": {"match_all": {}}, + "script": { + "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0", + "params": {"query_vector": query_vector}, + }, + } + } + + +class ElasticVectorSearch(VectorStore): + """Wrapper around Elasticsearch as a vector database. + + Example: + .. code-block:: python + + from langchain import ElasticVectorSearch + elastic_vector_search = ElasticVectorSearch( + "http://localhost:9200", + "embeddings", + embedding + ) + + """ + + def __init__(self, elasticsearch_url: str, index_name: str, embedding: Embeddings): + """Initialize with necessary components.""" + try: + import elasticsearch + except ImportError: + raise ValueError( + "Could not import elasticsearch python package. " + "Please install it with `pip install elasticsearch`." + ) + self.embedding = embedding + self.index_name = index_name + try: + es_client = elasticsearch.Elasticsearch(elasticsearch_url) # noqa + except ValueError as e: + raise ValueError( + f"Your elasticsearch client string is misformatted. Got error: {e} " + ) + self.client = es_client + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + try: + from elasticsearch.helpers import bulk + except ImportError: + raise ValueError( + "Could not import elasticsearch python package. " + "Please install it with `pip install elasticsearch`." + ) + requests = [] + ids = [] + embeddings = self.embedding.embed_documents(list(texts)) + for i, text in enumerate(texts): + metadata = metadatas[i] if metadatas else {} + _id = str(uuid.uuid4()) + request = { + "_op_type": "index", + "_index": self.index_name, + "vector": embeddings[i], + "text": text, + "metadata": metadata, + "_id": _id, + } + ids.append(_id) + requests.append(request) + bulk(self.client, requests) + # TODO: add option not to refresh + self.client.indices.refresh(index=self.index_name) + return ids + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query. + """ + embedding = self.embedding.embed_query(query) + script_query = _default_script_query(embedding) + response = self.client.search(index=self.index_name, query=script_query) + hits = [hit["_source"] for hit in response["hits"]["hits"][:k]] + documents = [ + Document(page_content=hit["text"], metadata=hit["metadata"]) for hit in hits + ] + return documents + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> ElasticVectorSearch: + """Construct ElasticVectorSearch wrapper from raw documents. + + This is a user-friendly interface that: + 1. Embeds documents. + 2. Creates a new index for the embeddings in the Elasticsearch instance. + 3. Adds the documents to the newly created Elasticsearch index. + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import ElasticVectorSearch + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + elastic_vector_search = ElasticVectorSearch.from_texts( + texts, + embeddings, + elasticsearch_url="http://localhost:9200" + ) + """ + elasticsearch_url = get_from_dict_or_env( + kwargs, "elasticsearch_url", "ELASTICSEARCH_URL" + ) + try: + import elasticsearch + from elasticsearch.helpers import bulk + except ImportError: + raise ValueError( + "Could not import elasticsearch python package. " + "Please install it with `pip install elasticearch`." + ) + try: + client = elasticsearch.Elasticsearch(elasticsearch_url) + except ValueError as e: + raise ValueError( + "Your elasticsearch client string is misformatted. " f"Got error: {e} " + ) + index_name = uuid.uuid4().hex + embeddings = embedding.embed_documents(texts) + dim = len(embeddings[0]) + mapping = _default_text_mapping(dim) + # TODO would be nice to create index before embedding, + # just to save expensive steps for last + client.indices.create(index=index_name, mappings=mapping) + requests = [] + for i, text in enumerate(texts): + metadata = metadatas[i] if metadatas else {} + request = { + "_op_type": "index", + "_index": index_name, + "vector": embeddings[i], + "text": text, + "metadata": metadata, + } + requests.append(request) + bulk(client, requests) + client.indices.refresh(index=index_name) + return cls(elasticsearch_url, index_name, embedding) diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py new file mode 100644 index 0000000000000000000000000000000000000000..9b139807a51d627eef57cf70dfeccd5e71f6dfab --- /dev/null +++ b/langchain/vectorstores/faiss.py @@ -0,0 +1,411 @@ +"""Wrapper around FAISS vector database.""" +from __future__ import annotations + +import pickle +import uuid +from pathlib import Path +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple + +import numpy as np + +from langchain.docstore.base import AddableMixin, Docstore +from langchain.docstore.document import Document +from langchain.docstore.in_memory import InMemoryDocstore +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.utils import maximal_marginal_relevance + + +def dependable_faiss_import() -> Any: + """Import faiss if available, otherwise raise error.""" + try: + import faiss + except ImportError: + raise ValueError( + "Could not import faiss python package. " + "Please install it with `pip install faiss` " + "or `pip install faiss-cpu` (depending on Python version)." + ) + return faiss + + +class FAISS(VectorStore): + """Wrapper around FAISS vector database. + + To use, you should have the ``faiss`` python package installed. + + Example: + .. code-block:: python + + from langchain import FAISS + faiss = FAISS(embedding_function, index, docstore, index_to_docstore_id) + + """ + + def __init__( + self, + embedding_function: Callable, + index: Any, + docstore: Docstore, + index_to_docstore_id: Dict[int, str], + ): + """Initialize with necessary components.""" + self.embedding_function = embedding_function + self.index = index + self.docstore = docstore + self.index_to_docstore_id = index_to_docstore_id + + def __add( + self, + texts: Iterable[str], + embeddings: Iterable[List[float]], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + if not isinstance(self.docstore, AddableMixin): + raise ValueError( + "If trying to add texts, the underlying docstore should support " + f"adding items, which {self.docstore} does not" + ) + documents = [] + for i, text in enumerate(texts): + metadata = metadatas[i] if metadatas else {} + documents.append(Document(page_content=text, metadata=metadata)) + # Add to the index, the index_to_id mapping, and the docstore. + starting_len = len(self.index_to_docstore_id) + self.index.add(np.array(embeddings, dtype=np.float32)) + # Get list of index, id, and docs. + full_info = [ + (starting_len + i, str(uuid.uuid4()), doc) + for i, doc in enumerate(documents) + ] + # Add information to docstore and index. + self.docstore.add({_id: doc for _, _id, doc in full_info}) + index_to_id = {index: _id for index, _id, _ in full_info} + self.index_to_docstore_id.update(index_to_id) + return [_id for _, _id, _ in full_info] + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + if not isinstance(self.docstore, AddableMixin): + raise ValueError( + "If trying to add texts, the underlying docstore should support " + f"adding items, which {self.docstore} does not" + ) + # Embed and create the documents. + embeddings = [self.embedding_function(text) for text in texts] + return self.__add(texts, embeddings, metadatas, **kwargs) + + def add_embeddings( + self, + text_embeddings: Iterable[Tuple[str, List[float]]], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + text_embeddings: Iterable pairs of string and embedding to + add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + if not isinstance(self.docstore, AddableMixin): + raise ValueError( + "If trying to add texts, the underlying docstore should support " + f"adding items, which {self.docstore} does not" + ) + # Embed and create the documents. + + texts = [te[0] for te in text_embeddings] + embeddings = [te[1] for te in text_embeddings] + return self.__add(texts, embeddings, metadatas, **kwargs) + + def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4 + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query and score for each + """ + scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k) + docs = [] + for j, i in enumerate(indices[0]): + if i == -1: + # This happens when not enough docs are returned. + continue + _id = self.index_to_docstore_id[i] + doc = self.docstore.search(_id) + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + docs.append((doc, scores[0][j])) + return docs + + def similarity_search_with_score( + self, query: str, k: int = 4 + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query and score for each + """ + embedding = self.embedding_function(query) + docs = self.similarity_search_with_score_by_vector(embedding, k) + return docs + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the embedding. + """ + docs_and_scores = self.similarity_search_with_score_by_vector(embedding, k) + return [doc for doc, _ in docs_and_scores] + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query. + """ + docs_and_scores = self.similarity_search_with_score(query, k) + return [doc for doc, _ in docs_and_scores] + + def max_marginal_relevance_search_by_vector( + self, embedding: List[float], k: int = 4, fetch_k: int = 20 + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + _, indices = self.index.search(np.array([embedding], dtype=np.float32), fetch_k) + # -1 happens when not enough docs are returned. + embeddings = [self.index.reconstruct(int(i)) for i in indices[0] if i != -1] + mmr_selected = maximal_marginal_relevance( + np.array([embedding], dtype=np.float32), embeddings, k=k + ) + selected_indices = [indices[0][i] for i in mmr_selected] + docs = [] + for i in selected_indices: + if i == -1: + # This happens when not enough docs are returned. + continue + _id = self.index_to_docstore_id[i] + doc = self.docstore.search(_id) + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + docs.append(doc) + return docs + + def max_marginal_relevance_search( + self, query: str, k: int = 4, fetch_k: int = 20 + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding = self.embedding_function(query) + docs = self.max_marginal_relevance_search_by_vector(embedding, k, fetch_k) + return docs + + def merge_from(self, target: FAISS) -> None: + """Merge another FAISS object with the current one. + + Add the target FAISS to the current one. + + Args: + target: FAISS object you wish to merge into the current one + + Returns: + None. + """ + if not isinstance(self.docstore, AddableMixin): + raise ValueError("Cannot merge with this type of docstore") + # Numerical index for target docs are incremental on existing ones + starting_len = len(self.index_to_docstore_id) + + # Merge two IndexFlatL2 + self.index.merge_from(target.index) + + # Create new id for docs from target FAISS object + full_info = [] + for i in target.index_to_docstore_id: + doc = target.docstore.search(target.index_to_docstore_id[i]) + if not isinstance(doc, Document): + raise ValueError("Document should be returned") + full_info.append((starting_len + i, str(uuid.uuid4()), doc)) + + # Add information to docstore and index_to_docstore_id. + self.docstore.add({_id: doc for _, _id, doc in full_info}) + index_to_id = {index: _id for index, _id, _ in full_info} + self.index_to_docstore_id.update(index_to_id) + + @classmethod + def __from( + cls, + texts: List[str], + embeddings: List[List[float]], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> FAISS: + faiss = dependable_faiss_import() + index = faiss.IndexFlatL2(len(embeddings[0])) + index.add(np.array(embeddings, dtype=np.float32)) + documents = [] + for i, text in enumerate(texts): + metadata = metadatas[i] if metadatas else {} + documents.append(Document(page_content=text, metadata=metadata)) + index_to_id = {i: str(uuid.uuid4()) for i in range(len(documents))} + docstore = InMemoryDocstore( + {index_to_id[i]: doc for i, doc in enumerate(documents)} + ) + return cls(embedding.embed_query, index, docstore, index_to_id) + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> FAISS: + """Construct FAISS wrapper from raw documents. + + This is a user friendly interface that: + 1. Embeds documents. + 2. Creates an in memory docstore + 3. Initializes the FAISS database + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import FAISS + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + faiss = FAISS.from_texts(texts, embeddings) + """ + embeddings = embedding.embed_documents(texts) + return cls.__from(texts, embeddings, embedding, metadatas, **kwargs) + + @classmethod + def from_embeddings( + cls, + text_embeddings: List[Tuple[str, List[float]]], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> FAISS: + """Construct FAISS wrapper from raw documents. + + This is a user friendly interface that: + 1. Embeds documents. + 2. Creates an in memory docstore + 3. Initializes the FAISS database + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import FAISS + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + faiss = FAISS.from_texts(texts, embeddings) + """ + texts = [t[0] for t in text_embeddings] + embeddings = [t[1] for t in text_embeddings] + return cls.__from(texts, embeddings, embedding, metadatas, **kwargs) + + def save_local(self, folder_path: str) -> None: + """Save FAISS index, docstore, and index_to_docstore_id to disk. + + Args: + folder_path: folder path to save index, docstore, + and index_to_docstore_id to. + """ + path = Path(folder_path) + path.mkdir(exist_ok=True, parents=True) + + # save index separately since it is not picklable + faiss = dependable_faiss_import() + faiss.write_index(self.index, str(path / "index.faiss")) + + # save docstore and index_to_docstore_id + with open(path / "index.pkl", "wb") as f: + pickle.dump((self.docstore, self.index_to_docstore_id), f) + + @classmethod + def load_local(cls, folder_path: str, embeddings: Embeddings) -> FAISS: + """Load FAISS index, docstore, and index_to_docstore_id to disk. + + Args: + folder_path: folder path to load index, docstore, + and index_to_docstore_id from. + embeddings: Embeddings to use when generating queries + """ + path = Path(folder_path) + # load index separately since it is not picklable + faiss = dependable_faiss_import() + index = faiss.read_index(str(path / "index.faiss")) + + # load docstore and index_to_docstore_id + with open(path / "index.pkl", "rb") as f: + docstore, index_to_docstore_id = pickle.load(f) + return cls(embeddings.embed_query, index, docstore, index_to_docstore_id) diff --git a/langchain/vectorstores/milvus.py b/langchain/vectorstores/milvus.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a0b208589deed96a2632d1c53b789b7e7cacbc --- /dev/null +++ b/langchain/vectorstores/milvus.py @@ -0,0 +1,429 @@ +"""Wrapper around the Milvus vector database.""" +from __future__ import annotations + +import uuid +from typing import Any, Iterable, List, Optional, Tuple + +import numpy as np + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.utils import maximal_marginal_relevance + + +class Milvus(VectorStore): + """Wrapper around the Milvus vector database.""" + + def __init__( + self, + embedding_function: Embeddings, + connection_args: dict, + collection_name: str, + text_field: str, + ): + """Initialize wrapper around the milvus vector database. + + In order to use this you need to have `pymilvus` installed and a + running Milvus instance. + + See the following documentation for how to run a Milvus instance: + https://milvus.io/docs/install_standalone-docker.md + + Args: + embedding_function (Embeddings): Function used to embed the text + connection_args (dict): Arguments for pymilvus connections.connect() + collection_name (str): The name of the collection to search. + text_field (str): The field in Milvus schema where the + original text is stored. + """ + try: + from pymilvus import Collection, DataType, connections + except ImportError: + raise ValueError( + "Could not import pymilvus python package. " + "Please install it with `pip install pymilvus`." + ) + # Connecting to Milvus instance + if not connections.has_connection("default"): + connections.connect(**connection_args) + self.embedding_func = embedding_function + self.collection_name = collection_name + + self.text_field = text_field + self.auto_id = False + self.primary_field = None + self.vector_field = None + self.fields = [] + + self.col = Collection(self.collection_name) + schema = self.col.schema + + # Grabbing the fields for the existing collection. + for x in schema.fields: + self.fields.append(x.name) + if x.auto_id: + self.fields.remove(x.name) + if x.is_primary: + self.primary_field = x.name + if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR: + self.vector_field = x.name + + # Default search params when one is not provided. + self.index_params = { + "IVF_FLAT": {"params": {"nprobe": 10}}, + "IVF_SQ8": {"params": {"nprobe": 10}}, + "IVF_PQ": {"params": {"nprobe": 10}}, + "HNSW": {"params": {"ef": 10}}, + "RHNSW_FLAT": {"params": {"ef": 10}}, + "RHNSW_SQ": {"params": {"ef": 10}}, + "RHNSW_PQ": {"params": {"ef": 10}}, + "IVF_HNSW": {"params": {"nprobe": 10, "ef": 10}}, + "ANNOY": {"params": {"search_k": 10}}, + } + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + partition_name: Optional[str] = None, + timeout: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Insert text data into Milvus. + + When using add_texts() it is assumed that a collecton has already + been made and indexed. If metadata is included, it is assumed that + it is ordered correctly to match the schema provided to the Collection + and that the embedding vector is the first schema field. + + Args: + texts (Iterable[str]): The text being embedded and inserted. + metadatas (Optional[List[dict]], optional): The metadata that + corresponds to each insert. Defaults to None. + partition_name (str, optional): The partition of the collection + to insert data into. Defaults to None. + timeout: specified timeout. + + Returns: + List[str]: The resulting keys for each inserted element. + """ + insert_dict: Any = {self.text_field: list(texts)} + try: + insert_dict[self.vector_field] = self.embedding_func.embed_documents( + list(texts) + ) + except NotImplementedError: + insert_dict[self.vector_field] = [ + self.embedding_func.embed_query(x) for x in texts + ] + # Collect the metadata into the insert dict. + if len(self.fields) > 2 and metadatas is not None: + for d in metadatas: + for key, value in d.items(): + if key in self.fields: + insert_dict.setdefault(key, []).append(value) + # Convert dict to list of lists for insertion + insert_list = [insert_dict[x] for x in self.fields] + # Insert into the collection. + res = self.col.insert( + insert_list, partition_name=partition_name, timeout=timeout + ) + # Flush to make sure newly inserted is immediately searchable. + self.col.flush() + return res.primary_keys + + def _worker_search( + self, + query: str, + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + partition_names: Optional[List[str]] = None, + round_decimal: int = -1, + timeout: Optional[int] = None, + **kwargs: Any, + ) -> Tuple[List[float], List[Tuple[Document, Any, Any]]]: + # Load the collection into memory for searching. + self.col.load() + # Decide to use default params if not passed in. + if param is None: + index_type = self.col.indexes[0].params["index_type"] + param = self.index_params[index_type] + # Embed the query text. + data = [self.embedding_func.embed_query(query)] + # Determine result metadata fields. + output_fields = self.fields[:] + output_fields.remove(self.vector_field) + # Perform the search. + res = self.col.search( + data, + self.vector_field, + param, + k, + expr=expr, + output_fields=output_fields, + partition_names=partition_names, + round_decimal=round_decimal, + timeout=timeout, + **kwargs, + ) + # Organize results. + ret = [] + for result in res[0]: + meta = {x: result.entity.get(x) for x in output_fields} + ret.append( + ( + Document(page_content=meta.pop(self.text_field), metadata=meta), + result.distance, + result.id, + ) + ) + + return data[0], ret + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + partition_names: Optional[List[str]] = None, + round_decimal: int = -1, + timeout: Optional[int] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Perform a search on a query string and return results. + + Args: + query (str): The text being searched. + k (int, optional): The amount of results ot return. Defaults to 4. + param (dict, optional): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + partition_names (List[str], optional): Partitions to search through. + Defaults to None. + round_decimal (int, optional): Round the resulting distance. Defaults + to -1. + timeout (int, optional): Amount to wait before timeout error. Defaults + to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[float], List[Tuple[Document, any, any]]: search_embedding, + (Document, distance, primary_field) results. + """ + _, result = self._worker_search( + query, k, param, expr, partition_names, round_decimal, timeout, **kwargs + ) + return [(x, y) for x, y, _ in result] + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + param: Optional[dict] = None, + expr: Optional[str] = None, + partition_names: Optional[List[str]] = None, + round_decimal: int = -1, + timeout: Optional[int] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a search and return results that are reordered by MMR. + + Args: + query (str): The text being searched. + k (int, optional): How many results to give. Defaults to 4. + fetch_k (int, optional): Total results to select k from. + Defaults to 20. + param (dict, optional): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + partition_names (List[str], optional): What partitions to search. + Defaults to None. + round_decimal (int, optional): Round the resulting distance. Defaults + to -1. + timeout (int, optional): Amount to wait before timeout error. Defaults + to None. + + Returns: + List[Document]: Document results for search. + """ + data, res = self._worker_search( + query, + fetch_k, + param, + expr, + partition_names, + round_decimal, + timeout, + **kwargs, + ) + # Extract result IDs. + ids = [x for _, _, x in res] + # Get the raw vectors from Milvus. + vectors = self.col.query( + expr=f"{self.primary_field} in {ids}", + output_fields=[self.primary_field, self.vector_field], + ) + # Reorganize the results from query to match result order. + vectors = {x[self.primary_field]: x[self.vector_field] for x in vectors} + search_embedding = data + ordered_result_embeddings = [vectors[x] for x in ids] + # Get the new order of results. + new_ordering = maximal_marginal_relevance( + np.array(search_embedding), ordered_result_embeddings, k=k + ) + # Reorder the values and return. + ret = [] + for x in new_ordering: + if x == -1: + break + else: + ret.append(res[x][0]) + return ret + + def similarity_search( + self, + query: str, + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + partition_names: Optional[List[str]] = None, + round_decimal: int = -1, + timeout: Optional[int] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a similarity search against the query string. + + Args: + query (str): The text to search. + k (int, optional): How many results to return. Defaults to 4. + param (dict, optional): The search params for the index type. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + partition_names (List[str], optional): What partitions to search. + Defaults to None. + round_decimal (int, optional): What decimal point to round to. + Defaults to -1. + timeout (int, optional): How long to wait before timeout error. + Defaults to None. + + Returns: + List[Document]: Document results for search. + """ + _, docs_and_scores = self._worker_search( + query, k, param, expr, partition_names, round_decimal, timeout, **kwargs + ) + return [doc for doc, _, _ in docs_and_scores] + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> Milvus: + """Create a Milvus collection, indexes it with HNSW, and insert data. + + Args: + texts (List[str]): Text to insert. + embedding (Embeddings): Embedding function to use. + metadatas (Optional[List[dict]], optional): Dict metatadata. + Defaults to None. + + Returns: + VectorStore: The Milvus vector store. + """ + try: + from pymilvus import ( + Collection, + CollectionSchema, + DataType, + FieldSchema, + connections, + ) + from pymilvus.orm.types import infer_dtype_bydata + except ImportError: + raise ValueError( + "Could not import pymilvus python package. " + "Please install it with `pip install pymilvus`." + ) + # Connect to Milvus instance + if not connections.has_connection("default"): + connections.connect(**kwargs.get("connection_args", {"port": 19530})) + # Determine embedding dim + embeddings = embedding.embed_query(texts[0]) + dim = len(embeddings) + # Generate unique names + primary_field = "c" + str(uuid.uuid4().hex) + vector_field = "c" + str(uuid.uuid4().hex) + text_field = "c" + str(uuid.uuid4().hex) + collection_name = "c" + str(uuid.uuid4().hex) + fields = [] + # Determine metadata schema + if metadatas: + # Check if all metadata keys line up + key = metadatas[0].keys() + for x in metadatas: + if key != x.keys(): + raise ValueError( + "Mismatched metadata. " + "Make sure all metadata has the same keys and datatype." + ) + # Create FieldSchema for each entry in singular metadata. + for key, value in metadatas[0].items(): + # Infer the corresponding datatype of the metadata + dtype = infer_dtype_bydata(value) + if dtype == DataType.UNKNOWN: + raise ValueError(f"Unrecognized datatype for {key}.") + elif dtype == DataType.VARCHAR: + # Find out max length text based metadata + max_length = 0 + for subvalues in metadatas: + max_length = max(max_length, len(subvalues[key])) + fields.append( + FieldSchema(key, DataType.VARCHAR, max_length=max_length + 1) + ) + else: + fields.append(FieldSchema(key, dtype)) + + # Find out max length of texts + max_length = 0 + for y in texts: + max_length = max(max_length, len(y)) + # Create the text field + fields.append( + FieldSchema(text_field, DataType.VARCHAR, max_length=max_length + 1) + ) + # Create the primary key field + fields.append( + FieldSchema(primary_field, DataType.INT64, is_primary=True, auto_id=True) + ) + # Create the vector field + fields.append(FieldSchema(vector_field, DataType.FLOAT_VECTOR, dim=dim)) + # Create the schema for the collection + schema = CollectionSchema(fields) + # Create the collection + collection = Collection(collection_name, schema) + # Index parameters for the collection + index = { + "index_type": "HNSW", + "metric_type": "L2", + "params": {"M": 8, "efConstruction": 64}, + } + # Create the index + collection.create_index(vector_field, index) + # Create the VectorStore + milvus = cls( + embedding, + kwargs.get("connection_args", {"port": 19530}), + collection_name, + text_field, + ) + # Add the texts. + milvus.add_texts(texts, metadatas) + + return milvus diff --git a/langchain/vectorstores/opensearch_vector_search.py b/langchain/vectorstores/opensearch_vector_search.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1d7357408cd94d58deda864c75e7be7b948b03 --- /dev/null +++ b/langchain/vectorstores/opensearch_vector_search.py @@ -0,0 +1,387 @@ +"""Wrapper around OpenSearch vector database.""" +from __future__ import annotations + +import uuid +from typing import Any, Dict, Iterable, List, Optional + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env +from langchain.vectorstores.base import VectorStore + +IMPORT_OPENSEARCH_PY_ERROR = ( + "Could not import OpenSearch. Please install it with `pip install opensearch-py`." +) +SCRIPT_SCORING_SEARCH = "script_scoring" +PAINLESS_SCRIPTING_SEARCH = "painless_scripting" +MATCH_ALL_QUERY = {"match_all": {}} # type: Dict + + +def _import_opensearch() -> Any: + """Import OpenSearch if available, otherwise raise error.""" + try: + from opensearchpy import OpenSearch + except ImportError: + raise ValueError(IMPORT_OPENSEARCH_PY_ERROR) + return OpenSearch + + +def _import_bulk() -> Any: + """Import bulk if available, otherwise raise error.""" + try: + from opensearchpy.helpers import bulk + except ImportError: + raise ValueError(IMPORT_OPENSEARCH_PY_ERROR) + return bulk + + +def _get_opensearch_client(opensearch_url: str) -> Any: + """Get OpenSearch client from the opensearch_url, otherwise raise error.""" + try: + opensearch = _import_opensearch() + client = opensearch(opensearch_url) + except ValueError as e: + raise ValueError( + f"OpenSearch client string provided is not in proper format. " + f"Got error: {e} " + ) + return client + + +def _validate_embeddings_and_bulk_size(embeddings_length: int, bulk_size: int) -> None: + """Validate Embeddings Length and Bulk Size.""" + if embeddings_length == 0: + raise RuntimeError("Embeddings size is zero") + if bulk_size < embeddings_length: + raise RuntimeError( + f"The embeddings count, {embeddings_length} is more than the " + f"[bulk_size], {bulk_size}. Increase the value of [bulk_size]." + ) + + +def _bulk_ingest_embeddings( + client: Any, + index_name: str, + embeddings: List[List[float]], + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, +) -> List[str]: + """Bulk Ingest Embeddings into given index.""" + bulk = _import_bulk() + requests = [] + ids = [] + for i, text in enumerate(texts): + metadata = metadatas[i] if metadatas else {} + _id = str(uuid.uuid4()) + request = { + "_op_type": "index", + "_index": index_name, + "vector_field": embeddings[i], + "text": text, + "metadata": metadata, + "_id": _id, + } + requests.append(request) + ids.append(_id) + bulk(client, requests) + client.indices.refresh(index=index_name) + return ids + + +def _default_scripting_text_mapping(dim: int) -> Dict: + """For Painless Scripting or Script Scoring,the default mapping to create index.""" + return { + "mappings": { + "properties": { + "vector_field": {"type": "knn_vector", "dimension": dim}, + } + } + } + + +def _default_text_mapping( + dim: int, + engine: str = "nmslib", + space_type: str = "l2", + ef_search: int = 512, + ef_construction: int = 512, + m: int = 16, +) -> Dict: + """For Approximate k-NN Search, this is the default mapping to create index.""" + return { + "settings": {"index": {"knn": True, "knn.algo_param.ef_search": ef_search}}, + "mappings": { + "properties": { + "vector_field": { + "type": "knn_vector", + "dimension": dim, + "method": { + "name": "hnsw", + "space_type": space_type, + "engine": engine, + "parameters": {"ef_construction": ef_construction, "m": m}, + }, + } + } + }, + } + + +def _default_approximate_search_query( + query_vector: List[float], size: int = 4, k: int = 4 +) -> Dict: + """For Approximate k-NN Search, this is the default query.""" + return { + "size": size, + "query": {"knn": {"vector_field": {"vector": query_vector, "k": k}}}, + } + + +def _default_script_query( + query_vector: List[float], + space_type: str = "l2", + pre_filter: Dict = MATCH_ALL_QUERY, +) -> Dict: + """For Script Scoring Search, this is the default query.""" + return { + "query": { + "script_score": { + "query": pre_filter, + "script": { + "source": "knn_score", + "lang": "knn", + "params": { + "field": "vector_field", + "query_value": query_vector, + "space_type": space_type, + }, + }, + } + } + } + + +def __get_painless_scripting_source(space_type: str, query_vector: List[float]) -> str: + """For Painless Scripting, it returns the script source based on space type.""" + source_value = ( + "(1.0 + " + space_type + "(" + str(query_vector) + ", doc['vector_field']))" + ) + if space_type == "cosineSimilarity": + return source_value + else: + return "1/" + source_value + + +def _default_painless_scripting_query( + query_vector: List[float], + space_type: str = "l2Squared", + pre_filter: Dict = MATCH_ALL_QUERY, +) -> Dict: + """For Painless Scripting Search, this is the default query.""" + source = __get_painless_scripting_source(space_type, query_vector) + return { + "query": { + "script_score": { + "query": pre_filter, + "script": { + "source": source, + "params": { + "field": "vector_field", + "query_value": query_vector, + }, + }, + } + } + } + + +def _get_kwargs_value(kwargs: Any, key: str, default_value: Any) -> Any: + """Get the value of the key if present. Else get the default_value.""" + if key in kwargs: + return kwargs.get(key) + return default_value + + +class OpenSearchVectorSearch(VectorStore): + """Wrapper around OpenSearch as a vector database. + + Example: + .. code-block:: python + + from langchain import OpenSearchVectorSearch + opensearch_vector_search = OpenSearchVectorSearch( + "http://localhost:9200", + "embeddings", + embedding_function + ) + + """ + + def __init__( + self, opensearch_url: str, index_name: str, embedding_function: Embeddings + ): + """Initialize with necessary components.""" + self.embedding_function = embedding_function + self.index_name = index_name + self.client = _get_opensearch_client(opensearch_url) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + bulk_size: int = 500, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + bulk_size: Bulk API request count; Default: 500 + + Returns: + List of ids from adding the texts into the vectorstore. + """ + embeddings = [ + self.embedding_function.embed_documents([text])[0] for text in texts + ] + _validate_embeddings_and_bulk_size(len(embeddings), bulk_size) + return _bulk_ingest_embeddings( + self.client, self.index_name, embeddings, texts, metadatas + ) + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query. + + By default supports Approximate Search. + Also supports Script Scoring and Painless Scripting. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + + Returns: + List of Documents most similar to the query. + + Optional Args for Approximate Search: + search_type: "approximate_search"; default: "approximate_search" + size: number of results the query actually returns; default: 4 + + Optional Args for Script Scoring Search: + search_type: "script_scoring"; default: "approximate_search" + + space_type: "l2", "l1", "linf", "cosinesimil", "innerproduct", + "hammingbit"; default: "l2" + + pre_filter: script_score query to pre-filter documents before identifying + nearest neighbors; default: {"match_all": {}} + + Optional Args for Painless Scripting Search: + search_type: "painless_scripting"; default: "approximate_search" + space_type: "l2Squared", "l1Norm", "cosineSimilarity"; default: "l2Squared" + + pre_filter: script_score query to pre-filter documents before identifying + nearest neighbors; default: {"match_all": {}} + """ + embedding = self.embedding_function.embed_query(query) + search_type = _get_kwargs_value(kwargs, "search_type", "approximate_search") + if search_type == "approximate_search": + size = _get_kwargs_value(kwargs, "size", 4) + search_query = _default_approximate_search_query(embedding, size, k) + elif search_type == SCRIPT_SCORING_SEARCH: + space_type = _get_kwargs_value(kwargs, "space_type", "l2") + pre_filter = _get_kwargs_value(kwargs, "pre_filter", MATCH_ALL_QUERY) + search_query = _default_script_query(embedding, space_type, pre_filter) + elif search_type == PAINLESS_SCRIPTING_SEARCH: + space_type = _get_kwargs_value(kwargs, "space_type", "l2Squared") + pre_filter = _get_kwargs_value(kwargs, "pre_filter", MATCH_ALL_QUERY) + search_query = _default_painless_scripting_query( + embedding, space_type, pre_filter + ) + else: + raise ValueError("Invalid `search_type` provided as an argument") + + response = self.client.search(index=self.index_name, body=search_query) + hits = [hit["_source"] for hit in response["hits"]["hits"][:k]] + documents = [ + Document(page_content=hit["text"], metadata=hit["metadata"]) for hit in hits + ] + return documents + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + bulk_size: int = 500, + **kwargs: Any, + ) -> OpenSearchVectorSearch: + """Construct OpenSearchVectorSearch wrapper from raw documents. + + Example: + .. code-block:: python + + from langchain import OpenSearchVectorSearch + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + opensearch_vector_search = OpenSearchVectorSearch.from_texts( + texts, + embeddings, + opensearch_url="http://localhost:9200" + ) + + OpenSearch by default supports Approximate Search powered by nmslib, faiss + and lucene engines recommended for large datasets. Also supports brute force + search through Script Scoring and Painless Scripting. + + Optional Keyword Args for Approximate Search: + engine: "nmslib", "faiss", "hnsw"; default: "nmslib" + + space_type: "l2", "l1", "cosinesimil", "linf", "innerproduct"; default: "l2" + + ef_search: Size of the dynamic list used during k-NN searches. Higher values + lead to more accurate but slower searches; default: 512 + + ef_construction: Size of the dynamic list used during k-NN graph creation. + Higher values lead to more accurate graph but slower indexing speed; + default: 512 + + m: Number of bidirectional links created for each new element. Large impact + on memory consumption. Between 2 and 100; default: 16 + + Keyword Args for Script Scoring or Painless Scripting: + is_appx_search: False + + """ + opensearch_url = get_from_dict_or_env( + kwargs, "opensearch_url", "OPENSEARCH_URL" + ) + client = _get_opensearch_client(opensearch_url) + embeddings = embedding.embed_documents(texts) + _validate_embeddings_and_bulk_size(len(embeddings), bulk_size) + dim = len(embeddings[0]) + # Get the index name from either from kwargs or ENV Variable + # before falling back to random generation + index_name = get_from_dict_or_env( + kwargs, "index_name", "OPENSEARCH_INDEX_NAME", default=uuid.uuid4().hex + ) + is_appx_search = _get_kwargs_value(kwargs, "is_appx_search", True) + if is_appx_search: + engine = _get_kwargs_value(kwargs, "engine", "nmslib") + space_type = _get_kwargs_value(kwargs, "space_type", "l2") + ef_search = _get_kwargs_value(kwargs, "ef_search", 512) + ef_construction = _get_kwargs_value(kwargs, "ef_construction", 512) + m = _get_kwargs_value(kwargs, "m", 16) + + mapping = _default_text_mapping( + dim, engine, space_type, ef_search, ef_construction, m + ) + else: + mapping = _default_scripting_text_mapping(dim) + + client.indices.create(index=index_name, body=mapping) + _bulk_ingest_embeddings(client, index_name, embeddings, texts, metadatas) + return cls(opensearch_url, index_name, embedding) diff --git a/langchain/vectorstores/pgvector.py b/langchain/vectorstores/pgvector.py new file mode 100644 index 0000000000000000000000000000000000000000..941a9378cbbef4e261db1f4be8ac98ebfdae99b1 --- /dev/null +++ b/langchain/vectorstores/pgvector.py @@ -0,0 +1,442 @@ +import enum +import logging +import uuid +from typing import Any, Dict, Iterable, List, Optional, Tuple + +import sqlalchemy +from pgvector.sqlalchemy import Vector +from sqlalchemy.dialects.postgresql import JSON, UUID +from sqlalchemy.orm import Mapped, Session, declarative_base, relationship + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env +from langchain.vectorstores.base import VectorStore + +Base = declarative_base() # type: Any + + +ADA_TOKEN_COUNT = 1536 +_LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" + + +class BaseModel(Base): + __abstract__ = True + uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + + +class CollectionStore(BaseModel): + __tablename__ = "langchain_pg_collection" + + name = sqlalchemy.Column(sqlalchemy.String) + cmetadata = sqlalchemy.Column(JSON) + + embeddings = relationship( + "EmbeddingStore", + back_populates="collection", + passive_deletes=True, + ) + + @classmethod + def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]: + return session.query(cls).filter(cls.name == name).first() + + @classmethod + def get_or_create( + cls, + session: Session, + name: str, + cmetadata: Optional[dict] = None, + ) -> Tuple["CollectionStore", bool]: + """ + Get or create a collection. + Returns [Collection, bool] where the bool is True if the collection was created. + """ + created = False + collection = cls.get_by_name(session, name) + if collection: + return collection, created + + collection = cls(name=name, cmetadata=cmetadata) + session.add(collection) + session.commit() + created = True + return collection, created + + +class EmbeddingStore(BaseModel): + __tablename__ = "langchain_pg_embedding" + + collection_id: Mapped[UUID] = sqlalchemy.Column( + UUID(as_uuid=True), + sqlalchemy.ForeignKey( + f"{CollectionStore.__tablename__}.uuid", + ondelete="CASCADE", + ), + ) + collection = relationship(CollectionStore, back_populates="embeddings") + + embedding: Vector = sqlalchemy.Column(Vector(ADA_TOKEN_COUNT)) + document = sqlalchemy.Column(sqlalchemy.String, nullable=True) + cmetadata = sqlalchemy.Column(JSON, nullable=True) + + # custom_id : any user defined id + custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True) + + +class QueryResult: + EmbeddingStore: EmbeddingStore + distance: float + + +class DistanceStrategy(str, enum.Enum): + EUCLIDEAN = EmbeddingStore.embedding.l2_distance + COSINE = EmbeddingStore.embedding.cosine_distance + MAX_INNER_PRODUCT = EmbeddingStore.embedding.max_inner_product + + +DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN + + +class PGVector(VectorStore): + """ + VectorStore implementation using Postgres and pgvector. + - `connection_string` is a postgres connection string. + - `embedding_function` any embedding function implementing + `langchain.embeddings.base.Embeddings` interface. + - `collection_name` is the name of the collection to use. (default: langchain) + - NOTE: This is not the name of the table, but the name of the collection. + The tables will be created when initializing the store (if not exists) + So, make sure the user has the right permissions to create tables. + - `distance_strategy` is the distance strategy to use. (default: EUCLIDEAN) + - `EUCLIDEAN` is the euclidean distance. + - `COSINE` is the cosine distance. + - `pre_delete_collection` if True, will delete the collection if it exists. + (default: False) + - Useful for testing. + """ + + def __init__( + self, + connection_string: str, + embedding_function: Embeddings, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + collection_metadata: Optional[dict] = None, + distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, + pre_delete_collection: bool = False, + logger: Optional[logging.Logger] = None, + ) -> None: + self.connection_string = connection_string + self.embedding_function = embedding_function + self.collection_name = collection_name + self.collection_metadata = collection_metadata + self.distance_strategy = distance_strategy + self.pre_delete_collection = pre_delete_collection + self.logger = logger or logging.getLogger(__name__) + self.__post_init__() + + def __post_init__( + self, + ) -> None: + """ + Initialize the store. + """ + self._conn = self.connect() + # self.create_vector_extension() + self.create_tables_if_not_exists() + self.create_collection() + + def connect(self) -> sqlalchemy.engine.Connection: + engine = sqlalchemy.create_engine(self.connection_string) + conn = engine.connect() + return conn + + def create_vector_extension(self) -> None: + try: + with Session(self._conn) as session: + statement = sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector") + session.execute(statement) + session.commit() + except Exception as e: + self.logger.exception(e) + + def create_tables_if_not_exists(self) -> None: + Base.metadata.create_all(self._conn) + + def drop_tables(self) -> None: + Base.metadata.drop_all(self._conn) + + def create_collection(self) -> None: + if self.pre_delete_collection: + self.delete_collection() + with Session(self._conn) as session: + CollectionStore.get_or_create( + session, self.collection_name, cmetadata=self.collection_metadata + ) + + def delete_collection(self) -> None: + self.logger.debug("Trying to delete collection") + with Session(self._conn) as session: + collection = self.get_collection(session) + if not collection: + self.logger.error("Collection not found") + return + session.delete(collection) + session.commit() + + def get_collection(self, session: Session) -> Optional["CollectionStore"]: + return CollectionStore.get_by_name(session, self.collection_name) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + if ids is None: + ids = [str(uuid.uuid1()) for _ in texts] + + embeddings = self.embedding_function.embed_documents(list(texts)) + + if not metadatas: + metadatas = [{} for _ in texts] + + with Session(self._conn) as session: + collection = self.get_collection(session) + if not collection: + raise ValueError("Collection not found") + for text, metadata, embedding, id in zip(texts, metadatas, embeddings, ids): + embedding_store = EmbeddingStore( + embedding=embedding, + document=text, + cmetadata=metadata, + custom_id=id, + ) + collection.embeddings.append(embedding_store) + session.add(embedding_store) + session.commit() + + return ids + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[dict] = None, + **kwargs: Any, + ) -> List[Document]: + """Run similarity search with PGVector with distance. + + Args: + query (str): Query text to search for. + k (int): Number of results to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List of Documents most similar to the query. + """ + embedding = self.embedding_function.embed_query(text=query) + return self.similarity_search_by_vector( + embedding=embedding, + k=k, + filter=filter, + ) + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[dict] = None, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List of Documents most similar to the query and score for each + """ + embedding = self.embedding_function.embed_query(query) + docs = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, filter=filter + ) + return docs + + def similarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[dict] = None, + ) -> List[Tuple[Document, float]]: + with Session(self._conn) as session: + collection = self.get_collection(session) + if not collection: + raise ValueError("Collection not found") + + filter_by = EmbeddingStore.collection_id == collection.uuid + + if filter is not None: + filter_clauses = [] + for key, value in filter.items(): + filter_by_metadata = EmbeddingStore.cmetadata[key].astext == str(value) + filter_clauses.append(filter_by_metadata) + + filter_by = sqlalchemy.and_(filter_by, *filter_clauses) + + results: List[QueryResult] = ( + session.query( + EmbeddingStore, + self.distance_strategy(embedding).label("distance"), # type: ignore + ) + .filter(filter_by) + .order_by(sqlalchemy.asc("distance")) + .join( + CollectionStore, + EmbeddingStore.collection_id == CollectionStore.uuid, + ) + .limit(k) + .all() + ) + docs = [ + ( + Document( + page_content=result.EmbeddingStore.document, + metadata=result.EmbeddingStore.cmetadata, + ), + result.distance if self.embedding_function is not None else None, + ) + for result in results + ] + return docs + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[dict] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List of Documents most similar to the query vector. + """ + docs_and_scores = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, filter=filter + ) + return [doc for doc, _ in docs_and_scores] + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + distance_strategy: DistanceStrategy = DistanceStrategy.COSINE, + ids: Optional[List[str]] = None, + pre_delete_collection: bool = False, + **kwargs: Any, + ) -> "PGVector": + """ + Return VectorStore initialized from texts and embeddings. + Postgres connection string is required + "Either pass it as a parameter + or set the PGVECTOR_CONNECTION_STRING environment variable. + """ + + connection_string = cls.get_connection_string(kwargs) + + store = cls( + connection_string=connection_string, + collection_name=collection_name, + embedding_function=embedding, + distance_strategy=distance_strategy, + pre_delete_collection=pre_delete_collection, + ) + + store.add_texts(texts=texts, metadatas=metadatas, ids=ids, **kwargs) + return store + + @classmethod + def get_connection_string(cls, kwargs: Dict[str, Any]) -> str: + connection_string: str = get_from_dict_or_env( + data=kwargs, + key="connection_string", + env_key="PGVECTOR_CONNECTION_STRING", + ) + + if not connection_string: + raise ValueError( + "Postgres connection string is required" + "Either pass it as a parameter" + "or set the PGVECTOR_CONNECTION_STRING environment variable." + ) + + return connection_string + + @classmethod + def from_documents( + cls, + documents: List[Document], + embedding: Embeddings, + collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, + distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, + ids: Optional[List[str]] = None, + pre_delete_collection: bool = False, + **kwargs: Any, + ) -> "PGVector": + """ + Return VectorStore initialized from documents and embeddings. + Postgres connection string is required + "Either pass it as a parameter + or set the PGVECTOR_CONNECTION_STRING environment variable. + """ + + texts = [d.page_content for d in documents] + metadatas = [d.metadata for d in documents] + connection_string = cls.get_connection_string(kwargs) + + kwargs["connection_string"] = connection_string + + return cls.from_texts( + texts=texts, + pre_delete_collection=pre_delete_collection, + embedding=embedding, + distance_strategy=distance_strategy, + metadatas=metadatas, + ids=ids, + collection_name=collection_name, + **kwargs, + ) + + @classmethod + def connection_string_from_db_params( + cls, + driver: str, + host: str, + port: int, + database: str, + user: str, + password: str, + ) -> str: + """Return connection string from database parameters.""" + return f"postgresql+{driver}://{user}:{password}@{host}:{port}/{database}" diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py new file mode 100644 index 0000000000000000000000000000000000000000..7983ba456d880f8a7894785955f27c57d78e53bb --- /dev/null +++ b/langchain/vectorstores/pinecone.py @@ -0,0 +1,254 @@ +"""Wrapper around Pinecone vector database.""" +from __future__ import annotations + +import uuid +from typing import Any, Callable, Iterable, List, Optional, Tuple + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore + + +class Pinecone(VectorStore): + """Wrapper around Pinecone vector database. + + To use, you should have the ``pinecone-client`` python package installed. + + Example: + .. code-block:: python + + from langchain.vectorstores import Pinecone + from langchain.embeddings.openai import OpenAIEmbeddings + import pinecone + + pinecone.init(api_key="***", environment="us-west1-gcp") + index = pinecone.Index("langchain-demo") + embeddings = OpenAIEmbeddings() + vectorstore = Pinecone(index, embeddings.embed_query, "text") + """ + + def __init__( + self, + index: Any, + embedding_function: Callable, + text_key: str, + namespace: Optional[str] = None, + ): + """Initialize with Pinecone client.""" + try: + import pinecone + except ImportError: + raise ValueError( + "Could not import pinecone python package. " + "Please install it with `pip install pinecone-client`." + ) + if not isinstance(index, pinecone.index.Index): + raise ValueError( + f"client should be an instance of pinecone.index.Index, " + f"got {type(index)}" + ) + self._index = index + self._embedding_function = embedding_function + self._text_key = text_key + self._namespace = namespace + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + namespace: Optional[str] = None, + batch_size: int = 32, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + ids: Optional list of ids to associate with the texts. + namespace: Optional pinecone namespace to add the texts to. + + Returns: + List of ids from adding the texts into the vectorstore. + + """ + if namespace is None: + namespace = self._namespace + # Embed and create the documents + docs = [] + ids = ids or [str(uuid.uuid4()) for _ in texts] + for i, text in enumerate(texts): + embedding = self._embedding_function(text) + metadata = metadatas[i] if metadatas else {} + metadata[self._text_key] = text + docs.append((ids[i], embedding, metadata)) + # upsert to Pinecone + self._index.upsert(vectors=docs, namespace=namespace, batch_size=batch_size) + return ids + + def similarity_search_with_score( + self, + query: str, + k: int = 5, + filter: Optional[dict] = None, + namespace: Optional[str] = None, + ) -> List[Tuple[Document, float]]: + """Return pinecone documents most similar to query, along with scores. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Dictionary of argument(s) to filter on metadata + namespace: Namespace to search in. Default will search in '' namespace. + + Returns: + List of Documents most similar to the query and score for each + """ + if namespace is None: + namespace = self._namespace + query_obj = self._embedding_function(query) + docs = [] + results = self._index.query( + [query_obj], + top_k=k, + include_metadata=True, + namespace=namespace, + filter=filter, + ) + for res in results["matches"]: + metadata = res["metadata"] + text = metadata.pop(self._text_key) + docs.append((Document(page_content=text, metadata=metadata), res["score"])) + return docs + + def similarity_search( + self, + query: str, + k: int = 5, + filter: Optional[dict] = None, + namespace: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Return pinecone documents most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Dictionary of argument(s) to filter on metadata + namespace: Namespace to search in. Default will search in '' namespace. + + Returns: + List of Documents most similar to the query and score for each + """ + if namespace is None: + namespace = self._namespace + query_obj = self._embedding_function(query) + docs = [] + results = self._index.query( + [query_obj], + top_k=k, + include_metadata=True, + namespace=namespace, + filter=filter, + ) + for res in results["matches"]: + metadata = res["metadata"] + text = metadata.pop(self._text_key) + docs.append(Document(page_content=text, metadata=metadata)) + return docs + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + batch_size: int = 32, + text_key: str = "text", + index_name: Optional[str] = None, + namespace: Optional[str] = None, + **kwargs: Any, + ) -> Pinecone: + """Construct Pinecone wrapper from raw documents. + + This is a user friendly interface that: + 1. Embeds documents. + 2. Adds the documents to a provided Pinecone index + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import Pinecone + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + pinecone = Pinecone.from_texts( + texts, + embeddings, + index_name="langchain-demo" + ) + """ + try: + import pinecone + except ImportError: + raise ValueError( + "Could not import pinecone python package. " + "Please install it with `pip install pinecone-client`." + ) + _index_name = index_name or str(uuid.uuid4()) + indexes = pinecone.list_indexes() # checks if provided index exists + if _index_name in indexes: + index = pinecone.Index(_index_name) + else: + index = None + for i in range(0, len(texts), batch_size): + # set end position of batch + i_end = min(i + batch_size, len(texts)) + # get batch of texts and ids + lines_batch = texts[i:i_end] + # create ids if not provided + if ids: + ids_batch = ids[i:i_end] + else: + ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)] + # create embeddings + embeds = embedding.embed_documents(lines_batch) + # prep metadata and upsert batch + if metadatas: + metadata = metadatas[i:i_end] + else: + metadata = [{} for _ in range(i, i_end)] + for j, line in enumerate(lines_batch): + metadata[j][text_key] = line + to_upsert = zip(ids_batch, embeds, metadata) + # Create index if it does not exist + if index is None: + pinecone.create_index(_index_name, dimension=len(embeds[0])) + index = pinecone.Index(_index_name) + # upsert to Pinecone + index.upsert(vectors=list(to_upsert), namespace=namespace) + return cls(index, embedding.embed_query, text_key, namespace) + + @classmethod + def from_existing_index( + cls, + index_name: str, + embedding: Embeddings, + text_key: str = "text", + namespace: Optional[str] = None, + ) -> Pinecone: + """Load pinecone vectorstore from index name.""" + try: + import pinecone + except ImportError: + raise ValueError( + "Could not import pinecone python package. " + "Please install it with `pip install pinecone-client`." + ) + + return cls( + pinecone.Index(index_name), embedding.embed_query, text_key, namespace + ) diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce535fcd4fe911cbbb9f88d55cd3cc4628b1264 --- /dev/null +++ b/langchain/vectorstores/qdrant.py @@ -0,0 +1,407 @@ +"""Wrapper around Qdrant vector database.""" +import uuid +from operator import itemgetter +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores import VectorStore +from langchain.vectorstores.utils import maximal_marginal_relevance + +MetadataFilter = Dict[str, Union[str, int, bool]] + + +class Qdrant(VectorStore): + """Wrapper around Qdrant vector database. + + To use you should have the ``qdrant-client`` package installed. + + Example: + .. code-block:: python + + from langchain import Qdrant + + client = QdrantClient() + collection_name = "MyCollection" + qdrant = Qdrant(client, collection_name, embedding_function) + """ + + CONTENT_KEY = "page_content" + METADATA_KEY = "metadata" + + def __init__( + self, + client: Any, + collection_name: str, + embedding_function: Callable, + content_payload_key: str = CONTENT_KEY, + metadata_payload_key: str = METADATA_KEY, + ): + """Initialize with necessary components.""" + try: + import qdrant_client + except ImportError: + raise ValueError( + "Could not import qdrant-client python package. " + "Please install it with `pip install qdrant-client`." + ) + + if not isinstance(client, qdrant_client.QdrantClient): + raise ValueError( + f"client should be an instance of qdrant_client.QdrantClient, " + f"got {type(client)}" + ) + + self.client: qdrant_client.QdrantClient = client + self.collection_name = collection_name + self.embedding_function = embedding_function + self.content_payload_key = content_payload_key or self.CONTENT_KEY + self.metadata_payload_key = metadata_payload_key or self.METADATA_KEY + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + from qdrant_client.http import models as rest + + ids = [uuid.uuid4().hex for _ in texts] + self.client.upsert( + collection_name=self.collection_name, + points=rest.Batch( + ids=ids, + vectors=[self.embedding_function(text) for text in texts], + payloads=self._build_payloads( + texts, + metadatas, + self.content_payload_key, + self.metadata_payload_key, + ), + ), + ) + + return ids + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[MetadataFilter] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter by metadata. Defaults to None. + + Returns: + List of Documents most similar to the query. + """ + results = self.similarity_search_with_score(query, k, filter) + return list(map(itemgetter(0), results)) + + def similarity_search_with_score( + self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter by metadata. Defaults to None. + + Returns: + List of Documents most similar to the query and score for each + """ + embedding = self.embedding_function(query) + results = self.client.search( + collection_name=self.collection_name, + query_vector=embedding, + query_filter=self._qdrant_filter_from_dict(filter), + with_payload=True, + limit=k, + ) + return [ + ( + self._document_from_scored_point( + result, self.content_payload_key, self.metadata_payload_key + ), + result.score, + ) + for result in results + ] + + def max_marginal_relevance_search( + self, query: str, k: int = 4, fetch_k: int = 20 + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding = self.embedding_function(query) + results = self.client.search( + collection_name=self.collection_name, + query_vector=embedding, + with_payload=True, + with_vectors=True, + limit=k, + ) + embeddings = [result.vector for result in results] + mmr_selected = maximal_marginal_relevance(embedding, embeddings, k=k) + return [ + self._document_from_scored_point( + results[i], self.content_payload_key, self.metadata_payload_key + ) + for i in mmr_selected + ] + + @classmethod + def from_documents( + cls, + documents: List[Document], + embedding: Embeddings, + url: Optional[str] = None, + port: Optional[int] = 6333, + grpc_port: int = 6334, + prefer_grpc: bool = False, + https: Optional[bool] = None, + api_key: Optional[str] = None, + prefix: Optional[str] = None, + timeout: Optional[float] = None, + host: Optional[str] = None, + collection_name: Optional[str] = None, + distance_func: str = "Cosine", + content_payload_key: str = CONTENT_KEY, + metadata_payload_key: str = METADATA_KEY, + **kwargs: Any, + ) -> "Qdrant": + return cast( + Qdrant, + super().from_documents( + documents, + embedding, + url=url, + port=port, + grpc_port=grpc_port, + prefer_grpc=prefer_grpc, + https=https, + api_key=api_key, + prefix=prefix, + timeout=timeout, + host=host, + collection_name=collection_name, + distance_func=distance_func, + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + **kwargs, + ), + ) + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + url: Optional[str] = None, + port: Optional[int] = 6333, + grpc_port: int = 6334, + prefer_grpc: bool = False, + https: Optional[bool] = None, + api_key: Optional[str] = None, + prefix: Optional[str] = None, + timeout: Optional[float] = None, + host: Optional[str] = None, + collection_name: Optional[str] = None, + distance_func: str = "Cosine", + content_payload_key: str = CONTENT_KEY, + metadata_payload_key: str = METADATA_KEY, + **kwargs: Any, + ) -> "Qdrant": + """Construct Qdrant wrapper from raw documents. + + Args: + texts: A list of texts to be indexed in Qdrant. + embedding: A subclass of `Embeddings`, responsible for text vectorization. + metadatas: + An optional list of metadata. If provided it has to be of the same + length as a list of texts. + url: either host or str of "Optional[scheme], host, Optional[port], + Optional[prefix]". Default: `None` + port: Port of the REST API interface. Default: 6333 + grpc_port: Port of the gRPC interface. Default: 6334 + prefer_grpc: + If `true` - use gPRC interface whenever possible in custom methods. + https: If `true` - use HTTPS(SSL) protocol. Default: `None` + api_key: API key for authentication in Qdrant Cloud. Default: `None` + prefix: + If not `None` - add `prefix` to the REST URL path. + Example: `service/v1` will result in + `http://localhost:6333/service/v1/{qdrant-endpoint}` for REST API. + Default: `None` + timeout: + Timeout for REST and gRPC API requests. + Default: 5.0 seconds for REST and unlimited for gRPC + host: + Host name of Qdrant service. If url and host are None, set to + 'localhost'. Default: `None` + collection_name: + Name of the Qdrant collection to be used. If not provided, + will be created randomly. + distance_func: + Distance function. One of the: "Cosine" / "Euclid" / "Dot". + content_payload_key: + A payload key used to store the content of the document. + metadata_payload_key: + A payload key used to store the metadata of the document. + **kwargs: + Additional arguments passed directly into REST client initialization + + This is a user friendly interface that: + 1. Embeds documents. + 2. Creates an in memory docstore + 3. Initializes the Qdrant database + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import Qdrant + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + qdrant = Qdrant.from_texts(texts, embeddings, "localhost") + """ + try: + import qdrant_client + except ImportError: + raise ValueError( + "Could not import qdrant-client python package. " + "Please install it with `pip install qdrant-client`." + ) + + from qdrant_client.http import models as rest + + # Just do a single quick embedding to get vector size + partial_embeddings = embedding.embed_documents(texts[:1]) + vector_size = len(partial_embeddings[0]) + + collection_name = collection_name or uuid.uuid4().hex + distance_func = distance_func.upper() + + client = qdrant_client.QdrantClient( + url=url, + port=port, + grpc_port=grpc_port, + prefer_grpc=prefer_grpc, + https=https, + api_key=api_key, + prefix=prefix, + timeout=timeout, + host=host, + **kwargs, + ) + + client.recreate_collection( + collection_name=collection_name, + vectors_config=rest.VectorParams( + size=vector_size, + distance=rest.Distance[distance_func], + ), + ) + + # Now generate the embeddings for all the texts + embeddings = embedding.embed_documents(texts) + + client.upsert( + collection_name=collection_name, + points=rest.Batch( + ids=[uuid.uuid4().hex for _ in texts], + vectors=embeddings, + payloads=cls._build_payloads( + texts, metadatas, content_payload_key, metadata_payload_key + ), + ), + ) + + return cls( + client=client, + collection_name=collection_name, + embedding_function=embedding.embed_query, + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + ) + + @classmethod + def _build_payloads( + cls, + texts: Iterable[str], + metadatas: Optional[List[dict]], + content_payload_key: str, + metadata_payload_key: str, + ) -> List[dict]: + payloads = [] + for i, text in enumerate(texts): + if text is None: + raise ValueError( + "At least one of the texts is None. Please remove it before " + "calling .from_texts or .add_texts on Qdrant instance." + ) + metadata = metadatas[i] if metadatas is not None else None + payloads.append( + { + content_payload_key: text, + metadata_payload_key: metadata, + } + ) + + return payloads + + @classmethod + def _document_from_scored_point( + cls, + scored_point: Any, + content_payload_key: str, + metadata_payload_key: str, + ) -> Document: + return Document( + page_content=scored_point.payload.get(content_payload_key), + metadata=scored_point.payload.get(metadata_payload_key) or {}, + ) + + def _qdrant_filter_from_dict(self, filter: Optional[MetadataFilter]) -> Any: + if filter is None or 0 == len(filter): + return None + + from qdrant_client.http import models as rest + + return rest.Filter( + must=[ + rest.FieldCondition( + key=f"{self.metadata_payload_key}.{key}", + match=rest.MatchValue(value=value), + ) + for key, value in filter.items() + ] + ) diff --git a/langchain/vectorstores/redis.py b/langchain/vectorstores/redis.py new file mode 100644 index 0000000000000000000000000000000000000000..1729ebff8c9653e2a3a8293fb7a97909d0bab956 --- /dev/null +++ b/langchain/vectorstores/redis.py @@ -0,0 +1,292 @@ +"""Wrapper around Redis vector database.""" +from __future__ import annotations + +import json +import logging +import uuid +from typing import Any, Callable, Iterable, List, Mapping, Optional + +import numpy as np +from redis.client import Redis as RedisType + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env +from langchain.vectorstores.base import VectorStore + +logger = logging.getLogger() + + +def _check_redis_module_exist(client: RedisType, module: str) -> bool: + return module in [m["name"] for m in client.info().get("modules", {"name": ""})] + + +class Redis(VectorStore): + def __init__( + self, + redis_url: str, + index_name: str, + embedding_function: Callable, + **kwargs: Any, + ): + """Initialize with necessary components.""" + try: + import redis + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + + self.embedding_function = embedding_function + self.index_name = index_name + try: + redis_client = redis.from_url(redis_url, **kwargs) + except ValueError as e: + raise ValueError(f"Your redis connected error: {e}") + + # check if redis add redisearch module + if not _check_redis_module_exist(redis_client, "search"): + raise ValueError( + "Could not use redis directly, you need to add search module" + "Please refer [RediSearch](https://redis.io/docs/stack/search/quick_start/)" # noqa + ) + + self.client = redis_client + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + # `prefix`: Maybe in the future we can let the user choose the index_name. + prefix = "doc" # prefix for the document keys + + ids = [] + # Check if index exists + for i, text in enumerate(texts): + key = f"{prefix}:{uuid.uuid4().hex}" + metadata = metadatas[i] if metadatas else {} + self.client.hset( + key, + mapping={ + "content": text, + "content_vector": np.array( + self.embedding_function(text), dtype=np.float32 + ).tobytes(), + "metadata": json.dumps(metadata), + }, + ) + ids.append(key) + return ids + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + try: + from redis.commands.search.query import Query + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + + # Creates embedding vector from user query + embedding = self.embedding_function(query) + + # Prepare the Query + return_fields = ["metadata", "content", "vector_score"] + vector_field = "content_vector" + hybrid_fields = "*" + base_query = ( + f"{hybrid_fields}=>[KNN {k} @{vector_field} $vector AS vector_score]" + ) + redis_query = ( + Query(base_query) + .return_fields(*return_fields) + .sort_by("vector_score") + .paging(0, k) + .dialect(2) + ) + params_dict: Mapping[str, str] = { + "vector": np.array(embedding) # type: ignore + .astype(dtype=np.float32) + .tobytes() + } + + # perform vector search + results = self.client.ft(self.index_name).search(redis_query, params_dict) + + documents = [ + Document(page_content=result.content, metadata=json.loads(result.metadata)) + for result in results.docs + ] + + return documents + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> Redis: + """Construct RediSearch wrapper from raw documents. + This is a user-friendly interface that: + 1. Embeds documents. + 2. Creates a new index for the embeddings in the RediSearch instance. + 3. Adds the documents to the newly created RediSearch index. + This is intended to be a quick way to get started. + Example: + .. code-block:: python + from langchain import RediSearch + from langchain.embeddings import OpenAIEmbeddings + embeddings = OpenAIEmbeddings() + redisearch = RediSearch.from_texts( + texts, + embeddings, + redis_url="redis://username:password@localhost:6379" + ) + """ + redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") + try: + import redis + from redis.commands.search.field import TextField, VectorField + from redis.commands.search.indexDefinition import IndexDefinition, IndexType + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + try: + # We need to first remove redis_url from kwargs, + # otherwise passing it to Redis will result in an error. + kwargs.pop("redis_url") + client = redis.from_url(url=redis_url, **kwargs) + except ValueError as e: + raise ValueError(f"Your redis connected error: {e}") + + # check if redis add redisearch module + if not _check_redis_module_exist(client, "search"): + raise ValueError( + "Could not use redis directly, you need to add search module" + "Please refer [RediSearch](https://redis.io/docs/stack/search/quick_start/)" # noqa + ) + + embeddings = embedding.embed_documents(texts) + dim = len(embeddings[0]) + # Constants + vector_number = len(embeddings) # initial number of vectors + # name of the search index if not given + if not index_name: + index_name = uuid.uuid4().hex + prefix = f"doc:{index_name}" # prefix for the document keys + distance_metric = ( + "COSINE" # distance metric for the vectors (ex. COSINE, IP, L2) + ) + content = TextField(name="content") + metadata = TextField(name="metadata") + content_embedding = VectorField( + "content_vector", + "FLAT", + { + "TYPE": "FLOAT32", + "DIM": dim, + "DISTANCE_METRIC": distance_metric, + "INITIAL_CAP": vector_number, + }, + ) + fields = [content, metadata, content_embedding] + + # Check if index exists + try: + client.ft(index_name).info() + logger.info("Index already exists") + except: # noqa + # Create Redis Index + client.ft(index_name).create_index( + fields=fields, + definition=IndexDefinition(prefix=[prefix], index_type=IndexType.HASH), + ) + + pipeline = client.pipeline() + for i, text in enumerate(texts): + key = f"{prefix}:{i}" + metadata = metadatas[i] if metadatas else {} + pipeline.hset( + key, + mapping={ + "content": text, + "content_vector": np.array( + embeddings[i], dtype=np.float32 + ).tobytes(), + "metadata": json.dumps(metadata), + }, + ) + pipeline.execute() + return cls(redis_url, index_name, embedding.embed_query) + + @staticmethod + def drop_index( + index_name: str, + delete_documents: bool, + **kwargs: Any, + ) -> bool: + redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") + try: + import redis + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + try: + # We need to first remove redis_url from kwargs, + # otherwise passing it to Redis will result in an error. + kwargs.pop("redis_url") + client = redis.from_url(url=redis_url, **kwargs) + except ValueError as e: + raise ValueError(f"Your redis connected error: {e}") + # Check if index exists + try: + client.ft(index_name).dropindex(delete_documents) + logger.info("Drop index") + return True + except: # noqa + # Index not exist + return False + + @classmethod + def from_existing_index( + cls, + embedding: Embeddings, + index_name: str, + **kwargs: Any, + ) -> Redis: + redis_url = get_from_dict_or_env(kwargs, "redis_url", "REDIS_URL") + try: + import redis + except ImportError: + raise ValueError( + "Could not import redis python package. " + "Please install it with `pip install redis`." + ) + try: + # We need to first remove redis_url from kwargs, + # otherwise passing it to Redis will result in an error. + kwargs.pop("redis_url") + client = redis.from_url(url=redis_url, **kwargs) + except ValueError as e: + raise ValueError(f"Your redis connected error: {e}") + + # check if redis add redisearch module + if not _check_redis_module_exist(client, "search"): + raise ValueError( + "Could not use redis directly, you need to add search module" + "Please refer [RediSearch](https://redis.io/docs/stack/search/quick_start/)" # noqa + ) + + return cls(redis_url, index_name, embedding.embed_query) diff --git a/langchain/vectorstores/utils.py b/langchain/vectorstores/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e34a7703a7eada1ecdd75f57acbd32eab2da2e4a --- /dev/null +++ b/langchain/vectorstores/utils.py @@ -0,0 +1,38 @@ +"""Utility functions for working with vectors and vectorstores.""" + +from typing import List + +import numpy as np + + +def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: + """Calculate cosine similarity with numpy.""" + return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) + + +def maximal_marginal_relevance( + query_embedding: np.ndarray, + embedding_list: list, + lambda_mult: float = 0.5, + k: int = 4, +) -> List[int]: + """Calculate maximal marginal relevance.""" + idxs: List[int] = [] + while len(idxs) < k: + best_score = -np.inf + idx_to_add = -1 + for i, emb in enumerate(embedding_list): + if i in idxs: + continue + first_part = cosine_similarity(query_embedding, emb) + second_part = 0.0 + for j in idxs: + cos_sim = cosine_similarity(emb, embedding_list[j]) + if cos_sim > second_part: + second_part = cos_sim + equation_score = lambda_mult * first_part - (1 - lambda_mult) * second_part + if equation_score > best_score: + best_score = equation_score + idx_to_add = i + idxs.append(idx_to_add) + return idxs diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py new file mode 100644 index 0000000000000000000000000000000000000000..29e67ced0a2631c1974f31d589600a0781abd06e --- /dev/null +++ b/langchain/vectorstores/weaviate.py @@ -0,0 +1,101 @@ +"""Wrapper around weaviate vector database.""" +from __future__ import annotations + +from typing import Any, Dict, Iterable, List, Optional +from uuid import uuid4 + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore + + +class Weaviate(VectorStore): + """Wrapper around Weaviate vector database. + + To use, you should have the ``weaviate-client`` python package installed. + + Example: + .. code-block:: python + + import weaviate + from langchain.vectorstores import Weaviate + client = weaviate.Client(url=os.environ["WEAVIATE_URL"], ...) + weaviate = Weaviate(client, index_name, text_key) + + """ + + def __init__( + self, + client: Any, + index_name: str, + text_key: str, + attributes: Optional[List[str]] = None, + ): + """Initialize with Weaviate client.""" + try: + import weaviate + except ImportError: + raise ValueError( + "Could not import weaviate python package. " + "Please install it with `pip install weaviate-client`." + ) + if not isinstance(client, weaviate.Client): + raise ValueError( + f"client should be an instance of weaviate.Client, got {type(client)}" + ) + self._client = client + self._index_name = index_name + self._text_key = text_key + self._query_attrs = [self._text_key] + if attributes is not None: + self._query_attrs.extend(attributes) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Upload texts with metadata (properties) to Weaviate.""" + from weaviate.util import get_valid_uuid + + with self._client.batch as batch: + ids = [] + for i, doc in enumerate(texts): + data_properties = { + self._text_key: doc, + } + if metadatas is not None: + for key in metadatas[i].keys(): + data_properties[key] = metadatas[i][key] + + _id = get_valid_uuid(uuid4()) + batch.add_data_object(data_properties, self._index_name, _id) + ids.append(_id) + return ids + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Look up similar documents in weaviate.""" + content: Dict[str, Any] = {"concepts": [query]} + if kwargs.get("search_distance"): + content["certainty"] = kwargs.get("search_distance") + query_obj = self._client.query.get(self._index_name, self._query_attrs) + result = query_obj.with_near_text(content).with_limit(k).do() + docs = [] + for res in result["data"]["Get"][self._index_name]: + text = res.pop(self._text_key) + docs.append(Document(page_content=text, metadata=res)) + return docs + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> VectorStore: + """Not implemented for Weaviate yet.""" + raise NotImplementedError("weaviate does not currently support `from_texts`.") diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000000000000000000000000000000000000..0cac6b67ca5eba7ccadb8b9fe54d03cee3478c5f --- /dev/null +++ b/poetry.lock @@ -0,0 +1,7547 @@ +# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. + +[[package]] +name = "absl-py" +version = "1.4.0" +description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"}, + {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, +] + +[[package]] +name = "aioboto3" +version = "10.4.0" +description = "Async boto3 wrapper" +category = "main" +optional = true +python-versions = ">=3.7,<4.0" +files = [ + {file = "aioboto3-10.4.0-py3-none-any.whl", hash = "sha256:6d0f0bf6af0168c27828e108f1a24182669a6ea6939437c27638caf06a693403"}, + {file = "aioboto3-10.4.0.tar.gz", hash = "sha256:e52b5f96b67031ddcbabcc55015bad3f851d3d4e6d5bfc7a1d1518d90e0c1fd8"}, +] + +[package.dependencies] +aiobotocore = {version = "2.4.2", extras = ["boto3"]} + +[package.extras] +chalice = ["chalice (>=1.24.0)"] +s3cse = ["cryptography (>=2.3.1)"] + +[[package]] +name = "aiobotocore" +version = "2.4.2" +description = "Async client for aws services using botocore and aiohttp" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "aiobotocore-2.4.2-py3-none-any.whl", hash = "sha256:4acd1ebe2e44be4b100aa553910bda899f6dc090b3da2bc1cf3d5de2146ed208"}, + {file = "aiobotocore-2.4.2.tar.gz", hash = "sha256:0603b74a582dffa7511ce7548d07dc9b10ec87bc5fb657eb0b34f9bd490958bf"}, +] + +[package.dependencies] +aiohttp = ">=3.3.1" +aioitertools = ">=0.5.1" +boto3 = {version = ">=1.24.59,<1.24.60", optional = true, markers = "extra == \"boto3\""} +botocore = ">=1.27.59,<1.27.60" +wrapt = ">=1.10.10" + +[package.extras] +awscli = ["awscli (>=1.25.60,<1.25.61)"] +boto3 = ["boto3 (>=1.24.59,<1.24.60)"] + +[[package]] +name = "aiodns" +version = "3.0.0" +description = "Simple DNS resolver for asyncio" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "aiodns-3.0.0-py3-none-any.whl", hash = "sha256:2b19bc5f97e5c936638d28e665923c093d8af2bf3aa88d35c43417fa25d136a2"}, + {file = "aiodns-3.0.0.tar.gz", hash = "sha256:946bdfabe743fceeeb093c8a010f5d1645f708a241be849e17edfb0e49e08cd6"}, +] + +[package.dependencies] +pycares = ">=4.0.0" + +[[package]] +name = "aiohttp" +version = "3.8.4" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ce45967538fb747370308d3145aa68a074bdecb4f3a300869590f725ced69c1"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b744c33b6f14ca26b7544e8d8aadff6b765a80ad6164fb1a430bbadd593dfb1a"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a45865451439eb320784918617ba54b7a377e3501fb70402ab84d38c2cd891b"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86d42d7cba1cec432d47ab13b6637bee393a10f664c425ea7b305d1301ca1a3"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee3c36df21b5714d49fc4580247947aa64bcbe2939d1b77b4c8dcb8f6c9faecc"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:176a64b24c0935869d5bbc4c96e82f89f643bcdf08ec947701b9dbb3c956b7dd"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c844fd628851c0bc309f3c801b3a3d58ce430b2ce5b359cd918a5a76d0b20cb5"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5393fb786a9e23e4799fec788e7e735de18052f83682ce2dfcabaf1c00c2c08e"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e4b09863aae0dc965c3ef36500d891a3ff495a2ea9ae9171e4519963c12ceefd"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:adfbc22e87365a6e564c804c58fc44ff7727deea782d175c33602737b7feadb6"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:147ae376f14b55f4f3c2b118b95be50a369b89b38a971e80a17c3fd623f280c9"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:eafb3e874816ebe2a92f5e155f17260034c8c341dad1df25672fb710627c6949"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6cc15d58053c76eacac5fa9152d7d84b8d67b3fde92709195cb984cfb3475ea"}, + {file = "aiohttp-3.8.4-cp310-cp310-win32.whl", hash = "sha256:59f029a5f6e2d679296db7bee982bb3d20c088e52a2977e3175faf31d6fb75d1"}, + {file = "aiohttp-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:fe7ba4a51f33ab275515f66b0a236bcde4fb5561498fe8f898d4e549b2e4509f"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d8ef1a630519a26d6760bc695842579cb09e373c5f227a21b67dc3eb16cfea4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b3f2e06a512e94722886c0827bee9807c86a9f698fac6b3aee841fab49bbfb4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a80464982d41b1fbfe3154e440ba4904b71c1a53e9cd584098cd41efdb188ef"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b631e26df63e52f7cce0cce6507b7a7f1bc9b0c501fcde69742130b32e8782f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f43255086fe25e36fd5ed8f2ee47477408a73ef00e804cb2b5cba4bf2ac7f5e"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d347a172f866cd1d93126d9b239fcbe682acb39b48ee0873c73c933dd23bd0f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3fec6a4cb5551721cdd70473eb009d90935b4063acc5f40905d40ecfea23e05"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80a37fe8f7c1e6ce8f2d9c411676e4bc633a8462844e38f46156d07a7d401654"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d1e6a862b76f34395a985b3cd39a0d949ca80a70b6ebdea37d3ab39ceea6698a"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd468460eefef601ece4428d3cf4562459157c0f6523db89365202c31b6daebb"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:618c901dd3aad4ace71dfa0f5e82e88b46ef57e3239fc7027773cb6d4ed53531"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:652b1bff4f15f6287550b4670546a2947f2a4575b6c6dff7760eafb22eacbf0b"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80575ba9377c5171407a06d0196b2310b679dc752d02a1fcaa2bc20b235dbf24"}, + {file = "aiohttp-3.8.4-cp311-cp311-win32.whl", hash = "sha256:bbcf1a76cf6f6dacf2c7f4d2ebd411438c275faa1dc0c68e46eb84eebd05dd7d"}, + {file = "aiohttp-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:6e74dd54f7239fcffe07913ff8b964e28b712f09846e20de78676ce2a3dc0bfc"}, + {file = "aiohttp-3.8.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:880e15bb6dad90549b43f796b391cfffd7af373f4646784795e20d92606b7a51"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb96fa6b56bb536c42d6a4a87dfca570ff8e52de2d63cabebfd6fb67049c34b6"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a6cadebe132e90cefa77e45f2d2f1a4b2ce5c6b1bfc1656c1ddafcfe4ba8131"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f352b62b45dff37b55ddd7b9c0c8672c4dd2eb9c0f9c11d395075a84e2c40f75"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ab43061a0c81198d88f39aaf90dae9a7744620978f7ef3e3708339b8ed2ef01"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9cb1565a7ad52e096a6988e2ee0397f72fe056dadf75d17fa6b5aebaea05622"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:1b3ea7edd2d24538959c1c1abf97c744d879d4e541d38305f9bd7d9b10c9ec41"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:7c7837fe8037e96b6dd5cfcf47263c1620a9d332a87ec06a6ca4564e56bd0f36"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3b90467ebc3d9fa5b0f9b6489dfb2c304a1db7b9946fa92aa76a831b9d587e99"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:cab9401de3ea52b4b4c6971db5fb5c999bd4260898af972bf23de1c6b5dd9d71"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d1f9282c5f2b5e241034a009779e7b2a1aa045f667ff521e7948ea9b56e0c5ff"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win32.whl", hash = "sha256:5e14f25765a578a0a634d5f0cd1e2c3f53964553a00347998dfdf96b8137f777"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win_amd64.whl", hash = "sha256:4c745b109057e7e5f1848c689ee4fb3a016c8d4d92da52b312f8a509f83aa05e"}, + {file = "aiohttp-3.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:aede4df4eeb926c8fa70de46c340a1bc2c6079e1c40ccf7b0eae1313ffd33519"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddaae3f3d32fc2cb4c53fab020b69a05c8ab1f02e0e59665c6f7a0d3a5be54f"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4eb3b82ca349cf6fadcdc7abcc8b3a50ab74a62e9113ab7a8ebc268aad35bb9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bcb89336efa095ea21b30f9e686763f2be4478f1b0a616969551982c4ee4c3b"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c08e8ed6fa3d477e501ec9db169bfac8140e830aa372d77e4a43084d8dd91ab"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6cd05ea06daca6ad6a4ca3ba7fe7dc5b5de063ff4daec6170ec0f9979f6c332"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7a00a9ed8d6e725b55ef98b1b35c88013245f35f68b1b12c5cd4100dddac333"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:de04b491d0e5007ee1b63a309956eaed959a49f5bb4e84b26c8f5d49de140fa9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:40653609b3bf50611356e6b6554e3a331f6879fa7116f3959b20e3528783e699"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dbf3a08a06b3f433013c143ebd72c15cac33d2914b8ea4bea7ac2c23578815d6"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854f422ac44af92bfe172d8e73229c270dc09b96535e8a548f99c84f82dde241"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win32.whl", hash = "sha256:aeb29c84bb53a84b1a81c6c09d24cf33bb8432cc5c39979021cc0f98c1292a1a"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:db3fc6120bce9f446d13b1b834ea5b15341ca9ff3f335e4a951a6ead31105480"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fabb87dd8850ef0f7fe2b366d44b77d7e6fa2ea87861ab3844da99291e81e60f"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91f6d540163f90bbaef9387e65f18f73ffd7c79f5225ac3d3f61df7b0d01ad15"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d265f09a75a79a788237d7f9054f929ced2e69eb0bb79de3798c468d8a90f945"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d89efa095ca7d442a6d0cbc755f9e08190ba40069b235c9886a8763b03785da"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dac314662f4e2aa5009977b652d9b8db7121b46c38f2073bfeed9f4049732cd"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe11310ae1e4cd560035598c3f29d86cef39a83d244c7466f95c27ae04850f10"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ddb2a2026c3f6a68c3998a6c47ab6795e4127315d2e35a09997da21865757f8"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e75b89ac3bd27d2d043b234aa7b734c38ba1b0e43f07787130a0ecac1e12228a"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6e601588f2b502c93c30cd5a45bfc665faaf37bbe835b7cfd461753068232074"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a5d794d1ae64e7753e405ba58e08fcfa73e3fad93ef9b7e31112ef3c9a0efb52"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a1f4689c9a1462f3df0a1f7e797791cd6b124ddbee2b570d34e7f38ade0e2c71"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3032dcb1c35bc330134a5b8a5d4f68c1a87252dfc6e1262c65a7e30e62298275"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8189c56eb0ddbb95bfadb8f60ea1b22fcfa659396ea36f6adcc521213cd7b44d"}, + {file = "aiohttp-3.8.4-cp38-cp38-win32.whl", hash = "sha256:33587f26dcee66efb2fff3c177547bd0449ab7edf1b73a7f5dea1e38609a0c54"}, + {file = "aiohttp-3.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:e595432ac259af2d4630008bf638873d69346372d38255774c0e286951e8b79f"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a7bdf9e57126dc345b683c3632e8ba317c31d2a41acd5800c10640387d193ed"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:22f6eab15b6db242499a16de87939a342f5a950ad0abaf1532038e2ce7d31567"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7235604476a76ef249bd64cb8274ed24ccf6995c4a8b51a237005ee7a57e8643"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea9eb976ffdd79d0e893869cfe179a8f60f152d42cb64622fca418cd9b18dc2a"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92c0cea74a2a81c4c76b62ea1cac163ecb20fb3ba3a75c909b9fa71b4ad493cf"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493f5bc2f8307286b7799c6d899d388bbaa7dfa6c4caf4f97ef7521b9cb13719"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a63f03189a6fa7c900226e3ef5ba4d3bd047e18f445e69adbd65af433add5a2"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10c8cefcff98fd9168cdd86c4da8b84baaa90bf2da2269c6161984e6737bf23e"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bca5f24726e2919de94f047739d0a4fc01372801a3672708260546aa2601bf57"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:03baa76b730e4e15a45f81dfe29a8d910314143414e528737f8589ec60cf7391"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8c29c77cc57e40f84acef9bfb904373a4e89a4e8b74e71aa8075c021ec9078c2"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:03543dcf98a6619254b409be2d22b51f21ec66272be4ebda7b04e6412e4b2e14"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17b79c2963db82086229012cff93ea55196ed31f6493bb1ccd2c62f1724324e4"}, + {file = "aiohttp-3.8.4-cp39-cp39-win32.whl", hash = "sha256:34ce9f93a4a68d1272d26030655dd1b58ff727b3ed2a33d80ec433561b03d67a"}, + {file = "aiohttp-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:41a86a69bb63bb2fc3dc9ad5ea9f10f1c9c8e282b471931be0268ddd09430b04"}, + {file = "aiohttp-3.8.4.tar.gz", hash = "sha256:bf2e1a9162c1e441bf805a1fd166e249d574ca04e03b34f97e2928769e91ab5c"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiohttp-retry" +version = "2.8.3" +description = "Simple retry client for aiohttp" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "aiohttp_retry-2.8.3-py3-none-any.whl", hash = "sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45"}, + {file = "aiohttp_retry-2.8.3.tar.gz", hash = "sha256:9a8e637e31682ad36e1ff9f8bcba912fcfc7d7041722bc901a4b948da4d71ea9"}, +] + +[package.dependencies] +aiohttp = "*" + +[[package]] +name = "aioitertools" +version = "0.11.0" +description = "itertools and builtins for AsyncIO and mixed iterables" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, + {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, +] + +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.10\""} + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "alabaster" +version = "0.7.13" +description = "A configurable sidebar-enabled Sphinx theme" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"}, + {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"}, +] + +[[package]] +name = "aleph-alpha-client" +version = "2.16.1" +description = "python client to interact with Aleph Alpha api endpoints" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "aleph-alpha-client-2.16.1.tar.gz", hash = "sha256:5e83a84326da1f32adb32702d9d0e3993e7d79b8aad8dc2301d1dd1866fcf6ed"}, + {file = "aleph_alpha_client-2.16.1-py3-none-any.whl", hash = "sha256:59120218b695bb73dd9f2bda6630740808ad4d10bea82b1b988856d487b1f192"}, +] + +[package.dependencies] +aiodns = ">=3.0.0" +aiohttp = ">=3.8.3" +aiohttp-retry = ">=2.8.3" +requests = ">=2.28" +tokenizers = ">=0.13.2" +urllib3 = ">=1.26" + +[package.extras] +dev = ["black", "ipykernel", "mypy", "nbconvert", "pytest", "pytest-aiohttp", "pytest-cov", "pytest-dotenv", "pytest-httpserver", "types-requests"] +docs = ["sphinx", "sphinx-rtd-theme"] +test = ["pytest", "pytest-aiohttp", "pytest-cov", "pytest-dotenv", "pytest-httpserver"] +types = ["mypy", "types-requests"] + +[[package]] +name = "anthropic" +version = "0.2.3" +description = "Library for accessing the anthropic API" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "anthropic-0.2.3-py3-none-any.whl", hash = "sha256:51cc9e3c5c0fc39b62af64b0607fd0da1622c7815fed89d0a52d80ebe0e60f3a"}, + {file = "anthropic-0.2.3.tar.gz", hash = "sha256:3d4f8d21c54d23d476d5ef72510b50126108f9b0bdc45b9d5d2e2b34204d56ad"}, +] + +[package.dependencies] +httpx = "*" +requests = "*" +tokenizers = "*" + +[package.extras] +dev = ["black (>=22.3.0)", "pytest"] + +[[package]] +name = "anyio" +version = "3.6.2" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.6.2" +files = [ + {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"}, + {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"] +trio = ["trio (>=0.16,<0.22)"] + +[[package]] +name = "appnope" +version = "0.1.3" +description = "Disable App Nap on macOS >= 10.9" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"}, + {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, +] + +[[package]] +name = "argon2-cffi" +version = "21.3.0" +description = "The secure Argon2 password hashing algorithm." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "argon2-cffi-21.3.0.tar.gz", hash = "sha256:d384164d944190a7dd7ef22c6aa3ff197da12962bd04b17f64d4e93d934dba5b"}, + {file = "argon2_cffi-21.3.0-py3-none-any.whl", hash = "sha256:8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80"}, +] + +[package.dependencies] +argon2-cffi-bindings = "*" + +[package.extras] +dev = ["cogapp", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "pre-commit", "pytest", "sphinx", "sphinx-notfound-page", "tomli"] +docs = ["furo", "sphinx", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"] + +[[package]] +name = "argon2-cffi-bindings" +version = "21.2.0" +description = "Low-level CFFI bindings for Argon2" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082"}, + {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f"}, + {file = "argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351"}, + {file = "argon2_cffi_bindings-21.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb"}, + {file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"}, +] + +[package.dependencies] +cffi = ">=1.0.1" + +[package.extras] +dev = ["cogapp", "pre-commit", "pytest", "wheel"] +tests = ["pytest"] + +[[package]] +name = "arrow" +version = "1.2.3" +description = "Better dates & times for Python" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"}, + {file = "arrow-1.2.3.tar.gz", hash = "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1"}, +] + +[package.dependencies] +python-dateutil = ">=2.7.0" + +[[package]] +name = "asttokens" +version = "2.2.1" +description = "Annotate AST trees with source code positions" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "asttokens-2.2.1-py2.py3-none-any.whl", hash = "sha256:6b0ac9e93fb0335014d382b8fa9b3afa7df546984258005da0b9e7095b3deb1c"}, + {file = "asttokens-2.2.1.tar.gz", hash = "sha256:4622110b2a6f30b77e1473affaa97e711bc2f07d3f10848420ff1898edbe94f3"}, +] + +[package.dependencies] +six = "*" + +[package.extras] +test = ["astroid", "pytest"] + +[[package]] +name = "astunparse" +version = "1.6.3" +description = "An AST unparser for Python" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, + {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, +] + +[package.dependencies] +six = ">=1.6.1,<2.0" +wheel = ">=0.23.0,<1.0" + +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] + +[[package]] +name = "attrs" +version = "22.2.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, + {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] +tests = ["attrs[tests-no-zope]", "zope.interface"] +tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] + +[[package]] +name = "authlib" +version = "1.2.0" +description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "Authlib-1.2.0-py2.py3-none-any.whl", hash = "sha256:4ddf4fd6cfa75c9a460b361d4bd9dac71ffda0be879dbe4292a02e92349ad55a"}, + {file = "Authlib-1.2.0.tar.gz", hash = "sha256:4fa3e80883a5915ef9f5bc28630564bc4ed5b5af39812a3ff130ec76bd631e9d"}, +] + +[package.dependencies] +cryptography = ">=3.2" + +[[package]] +name = "autodoc-pydantic" +version = "1.8.0" +description = "Seamlessly integrate pydantic models in your Sphinx documentation." +category = "dev" +optional = false +python-versions = ">=3.6,<4.0.0" +files = [ + {file = "autodoc_pydantic-1.8.0-py3-none-any.whl", hash = "sha256:f1bf9318f37369fec906ab523ebe65c1894395a6fc859dbc6fd02ffd90d3242f"}, + {file = "autodoc_pydantic-1.8.0.tar.gz", hash = "sha256:77da1cbbe4434fa9963f85a1555c63afff9a4acec06b318dc4f54c4f28a04f2c"}, +] + +[package.dependencies] +pydantic = ">=1.5" +Sphinx = ">=3.4" + +[package.extras] +dev = ["coverage (>=5,<6)", "flake8 (>=3,<4)", "pytest (>=6,<7)", "sphinx-copybutton (>=0.4,<0.5)", "sphinx-rtd-theme (>=1.0,<2.0)", "sphinx-tabs (>=3,<4)", "sphinxcontrib-mermaid (>=0.7,<0.8)", "tox (>=3,<4)"] +docs = ["sphinx-copybutton (>=0.4,<0.5)", "sphinx-rtd-theme (>=1.0,<2.0)", "sphinx-tabs (>=3,<4)", "sphinxcontrib-mermaid (>=0.7,<0.8)"] +test = ["coverage (>=5,<6)", "pytest (>=6,<7)"] + +[[package]] +name = "babel" +version = "2.12.1" +description = "Internationalization utilities" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"}, + {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"}, +] + +[package.dependencies] +pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""} + +[[package]] +name = "backcall" +version = "0.2.0" +description = "Specifications for callback functions passed in to an API" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, + {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.0" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.0-py3-none-any.whl", hash = "sha256:2130a5ad7f513200fae61a17abb5e338ca980fa28c439c0571014bc0217e9591"}, + {file = "beautifulsoup4-4.12.0.tar.gz", hash = "sha256:c5fceeaec29d09c84970e47c65f2f0efe57872f7cff494c9691a26ec0ff13234"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "black" +version = "23.1.0" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"}, + {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"}, + {file = "black-23.1.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b"}, + {file = "black-23.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"}, + {file = "black-23.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648"}, + {file = "black-23.1.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958"}, + {file = "black-23.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a"}, + {file = "black-23.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481"}, + {file = "black-23.1.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad"}, + {file = "black-23.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8"}, + {file = "black-23.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd"}, + {file = "black-23.1.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580"}, + {file = "black-23.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468"}, + {file = "black-23.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06"}, + {file = "black-23.1.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739"}, + {file = "black-23.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9"}, + {file = "black-23.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555"}, + {file = "black-23.1.0-py3-none-any.whl", hash = "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32"}, + {file = "black-23.1.0.tar.gz", hash = "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "bleach" +version = "6.0.0" +description = "An easy safelist-based HTML-sanitizing tool." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bleach-6.0.0-py3-none-any.whl", hash = "sha256:33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4"}, + {file = "bleach-6.0.0.tar.gz", hash = "sha256:1a1a85c1595e07d8db14c5f09f09e6433502c51c595970edc090551f0db99414"}, +] + +[package.dependencies] +six = ">=1.9.0" +webencodings = "*" + +[package.extras] +css = ["tinycss2 (>=1.1.0,<1.2)"] + +[[package]] +name = "blis" +version = "0.7.9" +description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "blis-0.7.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b3ea73707a7938304c08363a0b990600e579bfb52dece7c674eafac4bf2df9f7"}, + {file = "blis-0.7.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e85993364cae82707bfe7e637bee64ec96e232af31301e5c81a351778cb394b9"}, + {file = "blis-0.7.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d205a7e69523e2bacdd67ea906b82b84034067e0de83b33bd83eb96b9e844ae3"}, + {file = "blis-0.7.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9737035636452fb6d08e7ab79e5a9904be18a0736868a129179cd9f9ab59825"}, + {file = "blis-0.7.9-cp310-cp310-win_amd64.whl", hash = "sha256:d3882b4f44a33367812b5e287c0690027092830ffb1cce124b02f64e761819a4"}, + {file = "blis-0.7.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3dbb44311029263a6f65ed55a35f970aeb1d20b18bfac4c025de5aadf7889a8c"}, + {file = "blis-0.7.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fd5941bd5a21082b19d1dd0f6d62cd35609c25eb769aa3457d9877ef2ce37a9"}, + {file = "blis-0.7.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97ad55e9ef36e4ff06b35802d0cf7bfc56f9697c6bc9427f59c90956bb98377d"}, + {file = "blis-0.7.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7b6315d7b1ac5546bc0350f5f8d7cc064438d23db19a5c21aaa6ae7d93c1ab5"}, + {file = "blis-0.7.9-cp311-cp311-win_amd64.whl", hash = "sha256:5fd46c649acd1920482b4f5556d1c88693cba9bf6a494a020b00f14b42e1132f"}, + {file = "blis-0.7.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db2959560dcb34e912dad0e0d091f19b05b61363bac15d78307c01334a4e5d9d"}, + {file = "blis-0.7.9-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0521231bc95ab522f280da3bbb096299c910a62cac2376d48d4a1d403c54393"}, + {file = "blis-0.7.9-cp36-cp36m-win_amd64.whl", hash = "sha256:d811e88480203d75e6e959f313fdbf3326393b4e2b317067d952347f5c56216e"}, + {file = "blis-0.7.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5cb1db88ab629ccb39eac110b742b98e3511d48ce9caa82ca32609d9169a9c9c"}, + {file = "blis-0.7.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c399a03de4059bf8e700b921f9ff5d72b2a86673616c40db40cd0592051bdd07"}, + {file = "blis-0.7.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4eb70a79562a211bd2e6b6db63f1e2eed32c0ab3e9ef921d86f657ae8375845"}, + {file = "blis-0.7.9-cp37-cp37m-win_amd64.whl", hash = "sha256:3e3f95e035c7456a1f5f3b5a3cfe708483a00335a3a8ad2211d57ba4d5f749a5"}, + {file = "blis-0.7.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:179037cb5e6744c2e93b6b5facc6e4a0073776d514933c3db1e1f064a3253425"}, + {file = "blis-0.7.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0e82a6e0337d5231129a4e8b36978fa7b973ad3bb0257fd8e3714a9b35ceffd"}, + {file = "blis-0.7.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d12475e588a322e66a18346a3faa9eb92523504042e665c193d1b9b0b3f0482"}, + {file = "blis-0.7.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d5755ef37a573647be62684ca1545698879d07321f1e5b89a4fd669ce355eb0"}, + {file = "blis-0.7.9-cp38-cp38-win_amd64.whl", hash = "sha256:b8a1fcd2eb267301ab13e1e4209c165d172cdf9c0c9e08186a9e234bf91daa16"}, + {file = "blis-0.7.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8275f6b6eee714b85f00bf882720f508ed6a60974bcde489715d37fd35529da8"}, + {file = "blis-0.7.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7417667c221e29fe8662c3b2ff9bc201c6a5214bbb5eb6cc290484868802258d"}, + {file = "blis-0.7.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f4691bf62013eccc167c38a85c09a0bf0c6e3e80d4c2229cdf2668c1124eb0"}, + {file = "blis-0.7.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5cec812ee47b29107eb36af9b457be7191163eab65d61775ed63538232c59d5"}, + {file = "blis-0.7.9-cp39-cp39-win_amd64.whl", hash = "sha256:d81c3f627d33545fc25c9dcb5fee66c476d89288a27d63ac16ea63453401ffd5"}, + {file = "blis-0.7.9.tar.gz", hash = "sha256:29ef4c25007785a90ffc2f0ab3d3bd3b75cd2d7856a9a482b7d0dac8d511a09d"}, +] + +[package.dependencies] +numpy = ">=1.15.0" + +[[package]] +name = "boto3" +version = "1.24.59" +description = "The AWS SDK for Python" +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.24.59-py3-none-any.whl", hash = "sha256:34ab44146a2c4e7f4e72737f4b27e6eb5e0a7855c2f4599e3d9199b6a0a2d575"}, + {file = "boto3-1.24.59.tar.gz", hash = "sha256:a50b4323f9579cfe22fcf5531fbd40b567d4d74c1adce06aeb5c95fce2a6fb40"}, +] + +[package.dependencies] +botocore = ">=1.27.59,<1.28.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.27.59" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.27.59-py3-none-any.whl", hash = "sha256:69d756791fc024bda54f6c53f71ae34e695ee41bbbc1743d9179c4837a4929da"}, + {file = "botocore-1.27.59.tar.gz", hash = "sha256:eda4aed6ee719a745d1288eaf1beb12f6f6448ad1fa12f159405db14ba9c92cf"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.14.0)"] + +[[package]] +name = "cachetools" +version = "5.3.0" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = true +python-versions = "~=3.7" +files = [ + {file = "cachetools-5.3.0-py3-none-any.whl", hash = "sha256:429e1a1e845c008ea6c85aa35d4b98b65d6a9763eeef3e37e92728a12d1de9d4"}, + {file = "cachetools-5.3.0.tar.gz", hash = "sha256:13dfddc7b8df938c21a940dfa6557ce6e94a2f1cdfa58eb90c805721d58f2c14"}, +] + +[[package]] +name = "catalogue" +version = "2.0.8" +description = "Super lightweight function registries for your library" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "catalogue-2.0.8-py3-none-any.whl", hash = "sha256:2d786e229d8d202b4f8a2a059858e45a2331201d831e39746732daa704b99f69"}, + {file = "catalogue-2.0.8.tar.gz", hash = "sha256:b325c77659208bfb6af1b0d93b1a1aa4112e1bb29a4c5ced816758a722f0e388"}, +] + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, +] + +[[package]] +name = "cffi" +version = "1.15.1" +description = "Foreign Function Interface for Python calling C code." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, + {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, + {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, + {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, + {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, + {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, + {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, + {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, + {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, + {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, + {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, + {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, + {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, + {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, + {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, + {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, + {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, + {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, + {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, + {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, +] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "cohere" +version = "3.10.0" +description = "A Python library for the Cohere API" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "cohere-3.10.0.tar.gz", hash = "sha256:8c06a87a47aa9521051eeba130ce391d84ab578148c4ea5b62f6dcc41bd3a274"}, +] + +[package.dependencies] +requests = "*" +urllib3 = ">=1.26,<2.0" + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "comm" +version = "0.1.3" +description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "comm-0.1.3-py3-none-any.whl", hash = "sha256:16613c6211e20223f215fc6d3b266a247b6e2641bf4e0a3ad34cb1aff2aa3f37"}, + {file = "comm-0.1.3.tar.gz", hash = "sha256:a61efa9daffcfbe66fd643ba966f846a624e4e6d6767eda9cf6e993aadaab93e"}, +] + +[package.dependencies] +traitlets = ">=5.3" + +[package.extras] +lint = ["black (>=22.6.0)", "mdformat (>0.7)", "mdformat-gfm (>=0.3.5)", "ruff (>=0.0.156)"] +test = ["pytest"] +typing = ["mypy (>=0.990)"] + +[[package]] +name = "confection" +version = "0.0.4" +description = "The sweetest config system for Python" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "confection-0.0.4-py3-none-any.whl", hash = "sha256:aeac5919ba770c7b281aa5863bb6b0efed42568a7ad8ea26b6cb632154503fb2"}, + {file = "confection-0.0.4.tar.gz", hash = "sha256:b1ddf5885da635f0e260a40b339730806dfb1bd17d30e08764f35af841b04ecf"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.11.0" +srsly = ">=2.4.0,<3.0.0" + +[[package]] +name = "coverage" +version = "7.2.2" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "coverage-7.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c90e73bdecb7b0d1cea65a08cb41e9d672ac6d7995603d6465ed4914b98b9ad7"}, + {file = "coverage-7.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2926b8abedf750c2ecf5035c07515770944acf02e1c46ab08f6348d24c5f94d"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57b77b9099f172804e695a40ebaa374f79e4fb8b92f3e167f66facbf92e8e7f5"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:efe1c0adad110bf0ad7fb59f833880e489a61e39d699d37249bdf42f80590169"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2199988e0bc8325d941b209f4fd1c6fa007024b1442c5576f1a32ca2e48941e6"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:81f63e0fb74effd5be736cfe07d710307cc0a3ccb8f4741f7f053c057615a137"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:186e0fc9cf497365036d51d4d2ab76113fb74f729bd25da0975daab2e107fd90"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:420f94a35e3e00a2b43ad5740f935358e24478354ce41c99407cddd283be00d2"}, + {file = "coverage-7.2.2-cp310-cp310-win32.whl", hash = "sha256:38004671848b5745bb05d4d621526fca30cee164db42a1f185615f39dc997292"}, + {file = "coverage-7.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:0ce383d5f56d0729d2dd40e53fe3afeb8f2237244b0975e1427bfb2cf0d32bab"}, + {file = "coverage-7.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3eb55b7b26389dd4f8ae911ba9bc8c027411163839dea4c8b8be54c4ee9ae10b"}, + {file = "coverage-7.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d2b96123a453a2d7f3995ddb9f28d01fd112319a7a4d5ca99796a7ff43f02af5"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:299bc75cb2a41e6741b5e470b8c9fb78d931edbd0cd009c58e5c84de57c06731"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e1df45c23d4230e3d56d04414f9057eba501f78db60d4eeecfcb940501b08fd"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:006ed5582e9cbc8115d2e22d6d2144a0725db542f654d9d4fda86793832f873d"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d683d230b5774816e7d784d7ed8444f2a40e7a450e5720d58af593cb0b94a212"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8efb48fa743d1c1a65ee8787b5b552681610f06c40a40b7ef94a5b517d885c54"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c752d5264053a7cf2fe81c9e14f8a4fb261370a7bb344c2a011836a96fb3f57"}, + {file = "coverage-7.2.2-cp311-cp311-win32.whl", hash = "sha256:55272f33da9a5d7cccd3774aeca7a01e500a614eaea2a77091e9be000ecd401d"}, + {file = "coverage-7.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:92ebc1619650409da324d001b3a36f14f63644c7f0a588e331f3b0f67491f512"}, + {file = "coverage-7.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5afdad4cc4cc199fdf3e18088812edcf8f4c5a3c8e6cb69127513ad4cb7471a9"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0484d9dd1e6f481b24070c87561c8d7151bdd8b044c93ac99faafd01f695c78e"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d530191aa9c66ab4f190be8ac8cc7cfd8f4f3217da379606f3dd4e3d83feba69"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ac0f522c3b6109c4b764ffec71bf04ebc0523e926ca7cbe6c5ac88f84faced0"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ba279aae162b20444881fc3ed4e4f934c1cf8620f3dab3b531480cf602c76b7f"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:53d0fd4c17175aded9c633e319360d41a1f3c6e352ba94edcb0fa5167e2bad67"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c99cb7c26a3039a8a4ee3ca1efdde471e61b4837108847fb7d5be7789ed8fd9"}, + {file = "coverage-7.2.2-cp37-cp37m-win32.whl", hash = "sha256:5cc0783844c84af2522e3a99b9b761a979a3ef10fb87fc4048d1ee174e18a7d8"}, + {file = "coverage-7.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:817295f06eacdc8623dc4df7d8b49cea65925030d4e1e2a7c7218380c0072c25"}, + {file = "coverage-7.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6146910231ece63facfc5984234ad1b06a36cecc9fd0c028e59ac7c9b18c38c6"}, + {file = "coverage-7.2.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:387fb46cb8e53ba7304d80aadca5dca84a2fbf6fe3faf6951d8cf2d46485d1e5"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046936ab032a2810dcaafd39cc4ef6dd295df1a7cbead08fe996d4765fca9fe4"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e627dee428a176ffb13697a2c4318d3f60b2ccdde3acdc9b3f304206ec130ccd"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fa54fb483decc45f94011898727802309a109d89446a3c76387d016057d2c84"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3668291b50b69a0c1ef9f462c7df2c235da3c4073f49543b01e7eb1dee7dd540"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7c20b731211261dc9739bbe080c579a1835b0c2d9b274e5fcd903c3a7821cf88"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5764e1f7471cb8f64b8cda0554f3d4c4085ae4b417bfeab236799863703e5de2"}, + {file = "coverage-7.2.2-cp38-cp38-win32.whl", hash = "sha256:4f01911c010122f49a3e9bdc730eccc66f9b72bd410a3a9d3cb8448bb50d65d3"}, + {file = "coverage-7.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:c448b5c9e3df5448a362208b8d4b9ed85305528313fca1b479f14f9fe0d873b8"}, + {file = "coverage-7.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bfe7085783cda55e53510482fa7b5efc761fad1abe4d653b32710eb548ebdd2d"}, + {file = "coverage-7.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9d22e94e6dc86de981b1b684b342bec5e331401599ce652900ec59db52940005"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507e4720791977934bba016101579b8c500fb21c5fa3cd4cf256477331ddd988"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc4803779f0e4b06a2361f666e76f5c2e3715e8e379889d02251ec911befd149"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db8c2c5ace167fd25ab5dd732714c51d4633f58bac21fb0ff63b0349f62755a8"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f68ee32d7c4164f1e2c8797535a6d0a3733355f5861e0f667e37df2d4b07140"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d52f0a114b6a58305b11a5cdecd42b2e7f1ec77eb20e2b33969d702feafdd016"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:797aad79e7b6182cb49c08cc5d2f7aa7b2128133b0926060d0a8889ac43843be"}, + {file = "coverage-7.2.2-cp39-cp39-win32.whl", hash = "sha256:db45eec1dfccdadb179b0f9ca616872c6f700d23945ecc8f21bb105d74b1c5fc"}, + {file = "coverage-7.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:8dbe2647bf58d2c5a6c5bcc685f23b5f371909a5624e9f5cd51436d6a9f6c6ef"}, + {file = "coverage-7.2.2-pp37.pp38.pp39-none-any.whl", hash = "sha256:872d6ce1f5be73f05bea4df498c140b9e7ee5418bfa2cc8204e7f9b817caa968"}, + {file = "coverage-7.2.2.tar.gz", hash = "sha256:36dd42da34fe94ed98c39887b86db9d06777b1c8f860520e21126a75507024f2"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "cryptography" +version = "40.0.1" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:918cb89086c7d98b1b86b9fdb70c712e5a9325ba6f7d7cfb509e784e0cfc6917"}, + {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9618a87212cb5200500e304e43691111570e1f10ec3f35569fdfcd17e28fd797"}, + {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4805a4ca729d65570a1b7cac84eac1e431085d40387b7d3bbaa47e39890b88"}, + {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63dac2d25c47f12a7b8aa60e528bfb3c51c5a6c5a9f7c86987909c6c79765554"}, + {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a4e3406cfed6b1f6d6e87ed243363652b2586b2d917b0609ca4f97072994405"}, + {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1e0af458515d5e4028aad75f3bb3fe7a31e46ad920648cd59b64d3da842e4356"}, + {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8aa3609d337ad85e4eb9bb0f8bcf6e4409bfb86e706efa9a027912169e89122"}, + {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cf91e428c51ef692b82ce786583e214f58392399cf65c341bc7301d096fa3ba2"}, + {file = "cryptography-40.0.1-cp36-abi3-win32.whl", hash = "sha256:650883cc064297ef3676b1db1b7b1df6081794c4ada96fa457253c4cc40f97db"}, + {file = "cryptography-40.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:a805a7bce4a77d51696410005b3e85ae2839bad9aa38894afc0aa99d8e0c3160"}, + {file = "cryptography-40.0.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd033d74067d8928ef00a6b1327c8ea0452523967ca4463666eeba65ca350d4c"}, + {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d36bbeb99704aabefdca5aee4eba04455d7a27ceabd16f3b3ba9bdcc31da86c4"}, + {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:32057d3d0ab7d4453778367ca43e99ddb711770477c4f072a51b3ca69602780a"}, + {file = "cryptography-40.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f5d7b79fa56bc29580faafc2ff736ce05ba31feaa9d4735048b0de7d9ceb2b94"}, + {file = "cryptography-40.0.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7c872413353c70e0263a9368c4993710070e70ab3e5318d85510cc91cce77e7c"}, + {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:28d63d75bf7ae4045b10de5413fb1d6338616e79015999ad9cf6fc538f772d41"}, + {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6f2bbd72f717ce33100e6467572abaedc61f1acb87b8d546001328d7f466b778"}, + {file = "cryptography-40.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cc3a621076d824d75ab1e1e530e66e7e8564e357dd723f2533225d40fe35c60c"}, + {file = "cryptography-40.0.1.tar.gz", hash = "sha256:2803f2f8b1e95f614419926c7e6f55d828afc614ca5ed61543877ae668cc3472"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +pep8test = ["black", "check-manifest", "mypy", "ruff"] +sdist = ["setuptools-rust (>=0.11.4)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] +tox = ["tox"] + +[[package]] +name = "cymem" +version = "2.0.7" +description = "Manage calls to calloc/free through Cython" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "cymem-2.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4981fc9182cc1fe54bfedf5f73bfec3ce0c27582d9be71e130c46e35958beef0"}, + {file = "cymem-2.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:42aedfd2e77aa0518a24a2a60a2147308903abc8b13c84504af58539c39e52a3"}, + {file = "cymem-2.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c183257dc5ab237b664f64156c743e788f562417c74ea58c5a3939fe2d48d6f6"}, + {file = "cymem-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d18250f97eeb13af2e8b19d3cefe4bf743b963d93320b0a2e729771410fd8cf4"}, + {file = "cymem-2.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:864701e626b65eb2256060564ed8eb034ebb0a8f14ce3fbef337e88352cdee9f"}, + {file = "cymem-2.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:314273be1f143da674388e0a125d409e2721fbf669c380ae27c5cbae4011e26d"}, + {file = "cymem-2.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:df543a36e7000808fe0a03d92fd6cd8bf23fa8737c3f7ae791a5386de797bf79"}, + {file = "cymem-2.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e5e1b7de7952d89508d07601b9e95b2244e70d7ef60fbc161b3ad68f22815f8"}, + {file = "cymem-2.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2aa33f1dbd7ceda37970e174c38fd1cf106817a261aa58521ba9918156868231"}, + {file = "cymem-2.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:10178e402bb512b2686b8c2f41f930111e597237ca8f85cb583ea93822ef798d"}, + {file = "cymem-2.0.7-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2971b7da5aa2e65d8fbbe9f2acfc19ff8e73f1896e3d6e1223cc9bf275a0207"}, + {file = "cymem-2.0.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85359ab7b490e6c897c04863704481600bd45188a0e2ca7375eb5db193e13cb7"}, + {file = "cymem-2.0.7-cp36-cp36m-win_amd64.whl", hash = "sha256:0ac45088abffbae9b7db2c597f098de51b7e3c1023cb314e55c0f7f08440cf66"}, + {file = "cymem-2.0.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:26e5d5c6958855d2fe3d5629afe85a6aae5531abaa76f4bc21b9abf9caaccdfe"}, + {file = "cymem-2.0.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:011039e12d3144ac1bf3a6b38f5722b817f0d6487c8184e88c891b360b69f533"}, + {file = "cymem-2.0.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f9e63e5ad4ed6ffa21fd8db1c03b05be3fea2f32e32fdace67a840ea2702c3d"}, + {file = "cymem-2.0.7-cp37-cp37m-win_amd64.whl", hash = "sha256:5ea6b027fdad0c3e9a4f1b94d28d213be08c466a60c72c633eb9db76cf30e53a"}, + {file = "cymem-2.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4302df5793a320c4f4a263c7785d2fa7f29928d72cb83ebeb34d64a610f8d819"}, + {file = "cymem-2.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:24b779046484674c054af1e779c68cb224dc9694200ac13b22129d7fb7e99e6d"}, + {file = "cymem-2.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c50794c612801ed8b599cd4af1ed810a0d39011711c8224f93e1153c00e08d1"}, + {file = "cymem-2.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9525ad563b36dc1e30889d0087a0daa67dd7bb7d3e1530c4b61cd65cc756a5b"}, + {file = "cymem-2.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:48b98da6b906fe976865263e27734ebc64f972a978a999d447ad6c83334e3f90"}, + {file = "cymem-2.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e156788d32ad8f7141330913c5d5d2aa67182fca8f15ae22645e9f379abe8a4c"}, + {file = "cymem-2.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3da89464021fe669932fce1578343fcaf701e47e3206f50d320f4f21e6683ca5"}, + {file = "cymem-2.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f359cab9f16e25b3098f816c40acbf1697a3b614a8d02c56e6ebcb9c89a06b3"}, + {file = "cymem-2.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f165d7bce55d6730930e29d8294569788aa127f1be8d1642d9550ed96223cb37"}, + {file = "cymem-2.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:59a09cf0e71b1b88bfa0de544b801585d81d06ea123c1725e7c5da05b7ca0d20"}, + {file = "cymem-2.0.7.tar.gz", hash = "sha256:e6034badb5dd4e10344211c81f16505a55553a7164adc314c75bd80cf07e57a8"}, +] + +[[package]] +name = "dataclasses-json" +version = "0.5.7" +description = "Easily serialize dataclasses to and from JSON" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "dataclasses-json-0.5.7.tar.gz", hash = "sha256:c2c11bc8214fbf709ffc369d11446ff6945254a7f09128154a7620613d8fda90"}, + {file = "dataclasses_json-0.5.7-py3-none-any.whl", hash = "sha256:bc285b5f892094c3a53d558858a88553dd6a61a11ab1a8128a0e554385dcc5dd"}, +] + +[package.dependencies] +marshmallow = ">=3.3.0,<4.0.0" +marshmallow-enum = ">=1.5.1,<2.0.0" +typing-inspect = ">=0.4.0" + +[package.extras] +dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=6.2.3)", "simplejson", "types-dataclasses"] + + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + +[[package]] +name = "deeplake" +version = "3.2.18" +description = "Activeloop Deep Lake" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "deeplake-3.2.18.tar.gz", hash = "sha256:cb381fc771b08b32415efbb88c4adb57fc54ffa01f19d86b76dd6f839108799a"}, +] + +[package.dependencies] +aioboto3 = {version = "10.4.0", markers = "python_version >= \"3.7\" and sys_platform != \"win32\""} +boto3 = "*" +click = "*" +humbug = ">=0.2.6" +nest_asyncio = {version = "*", markers = "python_version >= \"3.7\" and sys_platform != \"win32\""} +numcodecs = "*" +numpy = "*" +pathos = "*" +pillow = "*" +pyjwt = "*" +tqdm = "*" + +[package.extras] +all = ["IPython", "av (>=8.1.0)", "flask", "google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)", "laspy", "libdeeplake (==0.0.41)", "nibabel", "oauth2client (>=4.1.3,<4.2.0)", "pydicom"] +audio = ["av (>=8.1.0)"] +av = ["av (>=8.1.0)"] +dicom = ["nibabel", "pydicom"] +enterprise = ["libdeeplake (==0.0.41)", "pyjwt"] +gcp = ["google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)"] +gdrive = ["google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "oauth2client (>=4.1.3,<4.2.0)"] +medical = ["nibabel", "pydicom"] +point-cloud = ["laspy"] +video = ["av (>=8.1.0)"] +visualizer = ["IPython", "flask"] + +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + +[[package]] +name = "dill" +version = "0.3.6" +description = "serialize all of python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"}, + {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "dnspython" +version = "2.3.0" +description = "DNS toolkit" +category = "main" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dnspython-2.3.0-py3-none-any.whl", hash = "sha256:89141536394f909066cabd112e3e1a37e4e654db00a25308b0f130bc3152eb46"}, + {file = "dnspython-2.3.0.tar.gz", hash = "sha256:224e32b03eb46be70e12ef6d64e0be123a64e621ab4c0822ff6d450d52a540b9"}, +] + +[package.extras] +curio = ["curio (>=1.2,<2.0)", "sniffio (>=1.1,<2.0)"] +dnssec = ["cryptography (>=2.6,<40.0)"] +doh = ["h2 (>=4.1.0)", "httpx (>=0.21.1)", "requests (>=2.23.0,<3.0.0)", "requests-toolbelt (>=0.9.1,<0.11.0)"] +doq = ["aioquic (>=0.9.20)"] +idna = ["idna (>=2.1,<4.0)"] +trio = ["trio (>=0.14,<0.23)"] +wmi = ["wmi (>=1.5.1,<2.0.0)"] + +[[package]] +name = "docutils" +version = "0.17.1" +description = "Docutils -- Python Documentation Utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "docutils-0.17.1-py2.py3-none-any.whl", hash = "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"}, + {file = "docutils-0.17.1.tar.gz", hash = "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125"}, +] + +[[package]] +name = "duckdb" +version = "0.7.1" +description = "DuckDB embedded database" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "duckdb-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3e0170be6cc315c179169dfa3e06485ef7009ef8ce399cd2908f29105ef2c67b"}, + {file = "duckdb-0.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6360d41023e726646507d5479ba60960989a09f04527b36abeef3643c61d8c48"}, + {file = "duckdb-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:578c269d7aa27184e8d45421694f89deda3f41fe6bd2a8ce48b262b9fc975326"}, + {file = "duckdb-0.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36aae9a923c9f78da1cf3fcf75873f62d32ea017d4cef7c706d16d3eca527ca2"}, + {file = "duckdb-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:630e0122a02f19bb1fafae00786350b2c31ae8422fce97c827bd3686e7c386af"}, + {file = "duckdb-0.7.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b9ca2d294725e523ce207bc37f28787478ae6f7a223e2cf3a213a2d498596c3"}, + {file = "duckdb-0.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0bd89f388205b6c99b62650169efe9a02933555ee1d46ddf79fbd0fb9e62652b"}, + {file = "duckdb-0.7.1-cp310-cp310-win32.whl", hash = "sha256:a9e987565a268fd8da9f65e54621d28f39c13105b8aee34c96643074babe6d9c"}, + {file = "duckdb-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:5d986b5ad1307b069309f9707c0c5051323e29865aefa059eb6c3b22dc9751b6"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:54606dfd24d7181d3098030ca6858f6be52f3ccbf42fff05f7587f2d9cdf4343"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd9367ae650b6605ffe00412183cf0edb688a5fc9fbb03ed757e8310e7ec3b6c"}, + {file = "duckdb-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aaf33aeb543c7816bd915cd10141866d54f92f698e1b5712de9d8b7076da19df"}, + {file = "duckdb-0.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e56b0329c38c0356b40449917bab6fce6ac27d356257b9a9da613d2a0f064e0"}, + {file = "duckdb-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:604b8b476d6cc6bf91625d8c2722ef9c50c402b3d64bc518c838d6c279e6d93b"}, + {file = "duckdb-0.7.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:32a268508c6d7fdc99d5442736051de74c28a5166c4cc3dcbbf35d383299b941"}, + {file = "duckdb-0.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90794406fa2111414877ee9db154fef940911f3920c312c1cf69947621737c8d"}, + {file = "duckdb-0.7.1-cp311-cp311-win32.whl", hash = "sha256:bf20c5ee62cbbf10b39ebdfd70d454ce914e70545c7cb6cb78cb5befef96328a"}, + {file = "duckdb-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:bb2700785cab37cd1e7a76c4547a5ab0f8a7c28ad3f3e4d02a8fae52be223090"}, + {file = "duckdb-0.7.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b09741cfa31388b8f9cdf5c5200e0995d55a5b54d2d1a75b54784e2f5c042f7f"}, + {file = "duckdb-0.7.1-cp36-cp36m-win32.whl", hash = "sha256:766e6390f7ace7f1e322085c2ca5d0ad94767bde78a38d168253d2b0b4d5cd5c"}, + {file = "duckdb-0.7.1-cp36-cp36m-win_amd64.whl", hash = "sha256:6a3f3315e2b553db3463f07324f62dfebaf3b97656a87558e59e2f1f816eaf15"}, + {file = "duckdb-0.7.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:278edb8c912d836b3b77fd1695887e1dbd736137c3912478af3608c9d7307bb0"}, + {file = "duckdb-0.7.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e189b558d10b58fe6ed85ce79f728e143eb4115db1e63147a44db613cd4dd0d9"}, + {file = "duckdb-0.7.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b91ec3544ee4dc9e6abbdf2669475d5adedaaea51987c67acf161673e6b7443"}, + {file = "duckdb-0.7.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3fe3f3dbd62b76a773144eef31aa29794578c359da932e77fef04516535318ca"}, + {file = "duckdb-0.7.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1e78c7f59325e99f0b3d9fe7c2bad4aaadf42d2c7711925cc26331d7647a91b2"}, + {file = "duckdb-0.7.1-cp37-cp37m-win32.whl", hash = "sha256:bc2a12d9f4fc8ef2fd1022d610287c9fc9972ea06b7510fc87387f1fa256a390"}, + {file = "duckdb-0.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:53e3db1bc0f445ee48b23cde47bfba08c7fa5a69976c740ec8cdf89543d2405d"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1247cc11bac17f2585d11681329806c86295e32242f84a10a604665e697d5c81"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5feaff16a012075b49dfa09d4cb24455938d6b0e06b08e1404ec00089119dba2"}, + {file = "duckdb-0.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b411a0c361eab9b26dcd0d0c7a0d1bc0ad6b214068555de7e946fbdd2619961a"}, + {file = "duckdb-0.7.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7c76d8694ecdb579241ecfeaf03c51d640b984dbbe8e1d9f919089ebf3cdea6"}, + {file = "duckdb-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193b896eed44d8751a755ccf002a137630020af0bc3505affa21bf19fdc90df3"}, + {file = "duckdb-0.7.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7da132ee452c80a3784b8daffd86429fa698e1b0e3ecb84660db96d36c27ad55"}, + {file = "duckdb-0.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5fd08c97c3e8cb5bec3822cf78b966b489213dcaab24b25c05a99f7caf8db467"}, + {file = "duckdb-0.7.1-cp38-cp38-win32.whl", hash = "sha256:9cb956f94fa55c4782352dac7cc7572a58312bd7ce97332bb14591d6059f0ea4"}, + {file = "duckdb-0.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:289a5f65213e66d320ebcd51a94787e7097b9d1c3492d01a121a2c809812bf19"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8085ad58c9b5854ee3820804fa1797e6b3134429c1506c3faab3cb96e71b07e9"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b47c19d1f2f662a5951fc6c5f6939d0d3b96689604b529cdcffd9afdcc95bff2"}, + {file = "duckdb-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6a611f598226fd634b7190f509cc6dd668132ffe436b0a6b43847b4b32b99e4a"}, + {file = "duckdb-0.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6730f03b5b78f3943b752c90bdf37b62ae3ac52302282a942cc675825b4a8dc9"}, + {file = "duckdb-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe23e938d29cd8ea6953d77dc828b7f5b95a4dbc7cd7fe5bcc3531da8cec3dba"}, + {file = "duckdb-0.7.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:feffe503c2e2a99480e1e5e15176f37796b3675e4dadad446fe7c2cc672aed3c"}, + {file = "duckdb-0.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:72fceb06f5bf24ad6bb5974c60d397a7a7e61b3d847507a22276de076f3392e2"}, + {file = "duckdb-0.7.1-cp39-cp39-win32.whl", hash = "sha256:c4d5217437d20d05fe23317bbc161befa1f9363f3622887cd1d2f4719b407936"}, + {file = "duckdb-0.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:066885e1883464ce3b7d1fd844f9431227dcffe1ee39bfd2a05cd6d53f304557"}, + {file = "duckdb-0.7.1.tar.gz", hash = "sha256:a7db6da0366b239ea1e4541fcc19556b286872f5015c9a54c2e347146e25a2ad"}, +] + +[[package]] +name = "duckdb-engine" +version = "0.7.0" +description = "SQLAlchemy driver for duckdb" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "duckdb_engine-0.7.0-py3-none-any.whl", hash = "sha256:272f8cb27cf7599372f6b2628c147c41cd656a316272d8ababdcc81447a5455c"}, + {file = "duckdb_engine-0.7.0.tar.gz", hash = "sha256:3c17b2dba582fe7d74731d6cb52d73eaba7555a31ca602f7837dfc40f9db90c4"}, +] + +[package.dependencies] +duckdb = ">=0.4.0" +numpy = "*" +sqlalchemy = ">=1.3.19" + +[[package]] +name = "elastic-transport" +version = "8.4.0" +description = "Transport classes and utilities shared among Python Elastic client libraries" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "elastic-transport-8.4.0.tar.gz", hash = "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10"}, + {file = "elastic_transport-8.4.0-py3-none-any.whl", hash = "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815"}, +] + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.26.2,<2" + +[package.extras] +develop = ["aiohttp", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "trustme"] + +[[package]] +name = "elasticsearch" +version = "8.6.2" +description = "Python client for Elasticsearch" +category = "main" +optional = true +python-versions = ">=3.6, <4" +files = [ + {file = "elasticsearch-8.6.2-py3-none-any.whl", hash = "sha256:8ccbebd9a0f6f523c7db67bb54863dde8bdb93daae4ff97f7c814e0500a73e84"}, + {file = "elasticsearch-8.6.2.tar.gz", hash = "sha256:084458e84caa91e3ad807b68aa82c022e785bead853a3b125641a25e894a1d47"}, +] + +[package.dependencies] +elastic-transport = ">=8,<9" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +requests = ["requests (>=2.4.0,<3.0.0)"] + +[[package]] +name = "entrypoints" +version = "0.4" +description = "Discover and load entry points from installed packages." +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"}, + {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "executing" +version = "1.2.0" +description = "Get the currently executing AST node of a frame, and other information" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "executing-1.2.0-py2.py3-none-any.whl", hash = "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc"}, + {file = "executing-1.2.0.tar.gz", hash = "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107"}, +] + +[package.extras] +tests = ["asttokens", "littleutils", "pytest", "rich"] + +[[package]] +name = "faiss-cpu" +version = "1.7.3" +description = "A library for efficient similarity search and clustering of dense vectors." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "faiss-cpu-1.7.3.tar.gz", hash = "sha256:cb71fe3f2934732d157d9d8cfb6ed2dd4020a0065571c84842ff6a3f0beab310"}, + {file = "faiss_cpu-1.7.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:343f025e0846239d987d0c719772387ad685b74e5ef62b2e5616cabef9062729"}, + {file = "faiss_cpu-1.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8b7b1cf693d7c24b5a633ff024717bd715fec501af4854357da0805b4899bcec"}, + {file = "faiss_cpu-1.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c37e5fc0a266839844798a53dd42dd6afbee0c5905611f3f278297053fccbd7"}, + {file = "faiss_cpu-1.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0628f7b0c6263ef4431995bb4f5f39833f999e96e6663935cbf0a1f2243dc4ac"}, + {file = "faiss_cpu-1.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:e22d1887c617156a673665c913ee82a30bfc1a3bc939ba8500b61328bce5a625"}, + {file = "faiss_cpu-1.7.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6d411449a5f3c3abfcafadaac3190ab1ab206023fc9110da86649506dcbe8a27"}, + {file = "faiss_cpu-1.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a10ea8622908f9f9ca4003e66da809dfad4af5c7d9fb7f582722d703bbc6c8bd"}, + {file = "faiss_cpu-1.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c5ced43ae058a62f63b12194ec9aa4c34066b0ea813ecbd936c65b7d52848c8"}, + {file = "faiss_cpu-1.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3df6371012248dea8e9509949e2d2c6d73dea7c1bdaa4ba4563eb1c3cd8021a6"}, + {file = "faiss_cpu-1.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:8b6ff7854c3f46104718c6b34e81cd48c156d970dd87703c5122ca90217bb8dc"}, + {file = "faiss_cpu-1.7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ab6314a8fbcce11dc3ecb6f48dda8c4ec274ed11c1f336f599f480bf0561442c"}, + {file = "faiss_cpu-1.7.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:877c0bbf4c4a1806d88e091aba4c91ff3fa35c3ede5663b7fafc5b39247a369e"}, + {file = "faiss_cpu-1.7.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f199be10d30ecc6ed65350931006eca01b7bb8faa27d63069318eea0f6a0c1"}, + {file = "faiss_cpu-1.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:1ca2b7cdbfdcc6a2e8fa75a09594916b50ec8260913ca48334dc3ce797179b5f"}, + {file = "faiss_cpu-1.7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7b3f91856c19cfb8464178bab7e8ea94a391f6947b556be6754f9fc10b3c25fb"}, + {file = "faiss_cpu-1.7.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a238a0ef4d36c614d6f60e1ea308288b3920091638a3687f708de6071d007c1"}, + {file = "faiss_cpu-1.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af53bee502c629eaaaf8b5ec648484a726be0fd2768ad4ef2bd4b829384b2682"}, + {file = "faiss_cpu-1.7.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:441d1c305595d925138f2cde63dabe8c10ee05fc8ad66bf750e278a7e8c409bd"}, + {file = "faiss_cpu-1.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:2766cc14b9004c1aae3b3943e693c3a9566eb1a25168b681981f9048276fe1e7"}, + {file = "faiss_cpu-1.7.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:20ef191bb6164c8e794b11d20427568a75d15980b6d66732071e9aa57ea06e2d"}, + {file = "faiss_cpu-1.7.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c57c293c4682066955626c2a2956be9a3b92594f69ed1a33abd72260a6911b69"}, + {file = "faiss_cpu-1.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd128170446ff3c3e28d89e813d32cd04f17fa3025794778a01a0d81524275dc"}, + {file = "faiss_cpu-1.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a14d832b5361ce9af21977eb1dcdebe23b9edcc12aad40316df7ca1bd86bc6b5"}, + {file = "faiss_cpu-1.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:52df8895c5e59d1c9eda368a63790381a6f7fceddb22bed08f9c90a706d8a148"}, +] + +[[package]] +name = "fastjsonschema" +version = "2.16.3" +description = "Fastest Python implementation of JSON schema" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "fastjsonschema-2.16.3-py3-none-any.whl", hash = "sha256:04fbecc94300436f628517b05741b7ea009506ce8f946d40996567c669318490"}, + {file = "fastjsonschema-2.16.3.tar.gz", hash = "sha256:4a30d6315a68c253cfa8f963b9697246315aa3db89f98b97235e345dedfb0b8e"}, +] + +[package.extras] +devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"] + +[[package]] +name = "filelock" +version = "3.10.6" +description = "A platform independent file lock." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "filelock-3.10.6-py3-none-any.whl", hash = "sha256:52f119747b2b9c4730dac715a7b1ab34b8ee70fd9259cba158ee53da566387ff"}, + {file = "filelock-3.10.6.tar.gz", hash = "sha256:409105becd604d6b176a483f855e7e8903c5cb2873e47f2c64f66a370c046aaf"}, +] + +[package.extras] +docs = ["furo (>=2022.12.7)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.2)", "diff-cover (>=7.5)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "flatbuffers" +version = "23.3.3" +description = "The FlatBuffers serialization format for Python" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "flatbuffers-23.3.3-py2.py3-none-any.whl", hash = "sha256:5ad36d376240090757e8f0a2cfaf6abcc81c6536c0dc988060375fd0899121f8"}, + {file = "flatbuffers-23.3.3.tar.gz", hash = "sha256:cabd87c4882f37840f6081f094b2c5bc28cefc2a6357732746936d055ab45c3d"}, +] + +[[package]] +name = "fqdn" +version = "1.5.1" +description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" +files = [ + {file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"}, + {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, +] + +[[package]] +name = "freezegun" +version = "1.2.2" +description = "Let your Python tests travel through time" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "freezegun-1.2.2-py3-none-any.whl", hash = "sha256:ea1b963b993cb9ea195adbd893a48d573fda951b0da64f60883d7e988b606c9f"}, + {file = "freezegun-1.2.2.tar.gz", hash = "sha256:cd22d1ba06941384410cd967d8a99d5ae2442f57dfafeff2fda5de8dc5c05446"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "frozenlist" +version = "1.3.3" +description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, + {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, + {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, + {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, + {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, + {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, + {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, + {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, + {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, + {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, + {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, +] + +[[package]] +name = "gast" +version = "0.4.0" +description = "Python AST that abstracts the underlying Python version" +category = "main" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "gast-0.4.0-py3-none-any.whl", hash = "sha256:b7adcdd5adbebf1adf17378da5ba3f543684dbec47b1cda1f3997e573cd542c4"}, + {file = "gast-0.4.0.tar.gz", hash = "sha256:40feb7b8b8434785585ab224d1568b857edb18297e5a3047f1ba012bc83b42c1"}, +] + +[[package]] +name = "google-api-core" +version = "2.11.0" +description = "Google API client core library" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "google-api-core-2.11.0.tar.gz", hash = "sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22"}, + {file = "google_api_core-2.11.0-py3-none-any.whl", hash = "sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e"}, +] + +[package.dependencies] +google-auth = ">=2.14.1,<3.0dev" +googleapis-common-protos = ">=1.56.2,<2.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)", "grpcio-status (>=1.49.1,<2.0dev)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] + +[[package]] +name = "google-api-python-client" +version = "2.70.0" +description = "Google API Client Library for Python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "google-api-python-client-2.70.0.tar.gz", hash = "sha256:262de094d5a30d337f59e66581019fed45b698c078397ac48dd323c0968236e7"}, + {file = "google_api_python_client-2.70.0-py2.py3-none-any.whl", hash = "sha256:67da78956f2bf4b763305cd791aeab250878c1f88f1422aaba4682a608b8e5a4"}, +] + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.19.0,<3.0.0dev" +google-auth-httplib2 = ">=0.1.0" +httplib2 = ">=0.15.0,<1dev" +uritemplate = ">=3.0.1,<5" + +[[package]] +name = "google-auth" +version = "2.16.3" +description = "Google Authentication Library" +category = "main" +optional = true +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" +files = [ + {file = "google-auth-2.16.3.tar.gz", hash = "sha256:611779ce33a3aee265b94b74d4bb8c188f33010f5814761250a0ebbde94cc745"}, + {file = "google_auth-2.16.3-py2.py3-none-any.whl", hash = "sha256:4dfcfd8ecd1cf03ddc97fddfb3b1f2973ea4f3f664aa0d8cfaf582ef9f0c60e7"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} +six = ">=1.9.0" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0dev)"] + +[[package]] +name = "google-auth-httplib2" +version = "0.1.0" +description = "Google Authentication Library: httplib2 transport" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, + {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, +] + +[package.dependencies] +google-auth = "*" +httplib2 = ">=0.15.0" +six = "*" + +[[package]] +name = "google-auth-oauthlib" +version = "0.4.6" +description = "Google Authentication Library" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "google-auth-oauthlib-0.4.6.tar.gz", hash = "sha256:a90a072f6993f2c327067bf65270046384cda5a8ecb20b94ea9a687f1f233a7a"}, + {file = "google_auth_oauthlib-0.4.6-py2.py3-none-any.whl", hash = "sha256:3f2a6e802eebbb6fb736a370fbf3b055edcb6b52878bf2f26330b5e041316c73"}, +] + +[package.dependencies] +google-auth = ">=1.0.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]] +name = "google-pasta" +version = "0.2.0" +description = "pasta is an AST-based Python refactoring library" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "google-pasta-0.2.0.tar.gz", hash = "sha256:c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e"}, + {file = "google_pasta-0.2.0-py2-none-any.whl", hash = "sha256:4612951da876b1a10fe3960d7226f0c7682cf901e16ac06e473b267a5afa8954"}, + {file = "google_pasta-0.2.0-py3-none-any.whl", hash = "sha256:b32482794a366b5366a32c92a9a9201b107821889935a02b3e51f6b432ea84ed"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "google-search-results" +version = "2.4.2" +description = "Scrape and search localized results from Google, Bing, Baidu, Yahoo, Yandex, Ebay, Homedepot, youtube at scale using SerpApi.com" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "google_search_results-2.4.2.tar.gz", hash = "sha256:603a30ecae2af8e600b22635757a6df275dad4b934f975e67878ccd640b78245"}, +] + +[package.dependencies] +requests = "*" + +[[package]] +name = "googleapis-common-protos" +version = "1.59.0" +description = "Common protobufs used in Google APIs" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "googleapis-common-protos-1.59.0.tar.gz", hash = "sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44"}, + {file = "googleapis_common_protos-1.59.0-py2.py3-none-any.whl", hash = "sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f"}, +] + +[package.dependencies] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" + +[package.extras] +grpc = ["grpcio (>=1.44.0,<2.0.0dev)"] + +[[package]] +name = "greenlet" +version = "2.0.1" +description = "Lightweight in-process concurrent programming" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +files = [ + {file = "greenlet-2.0.1-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:9ed358312e63bf683b9ef22c8e442ef6c5c02973f0c2a939ec1d7b50c974015c"}, + {file = "greenlet-2.0.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4f09b0010e55bec3239278f642a8a506b91034f03a4fb28289a7d448a67f1515"}, + {file = "greenlet-2.0.1-cp27-cp27m-win32.whl", hash = "sha256:1407fe45246632d0ffb7a3f4a520ba4e6051fc2cbd61ba1f806900c27f47706a"}, + {file = "greenlet-2.0.1-cp27-cp27m-win_amd64.whl", hash = "sha256:3001d00eba6bbf084ae60ec7f4bb8ed375748f53aeaefaf2a37d9f0370558524"}, + {file = "greenlet-2.0.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d566b82e92ff2e09dd6342df7e0eb4ff6275a3f08db284888dcd98134dbd4243"}, + {file = "greenlet-2.0.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0722c9be0797f544a3ed212569ca3fe3d9d1a1b13942d10dd6f0e8601e484d26"}, + {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d37990425b4687ade27810e3b1a1c37825d242ebc275066cfee8cb6b8829ccd"}, + {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be35822f35f99dcc48152c9839d0171a06186f2d71ef76dc57fa556cc9bf6b45"}, + {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c140e7eb5ce47249668056edf3b7e9900c6a2e22fb0eaf0513f18a1b2c14e1da"}, + {file = "greenlet-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d21681f09e297a5adaa73060737e3aa1279a13ecdcfcc6ef66c292cb25125b2d"}, + {file = "greenlet-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fb412b7db83fe56847df9c47b6fe3f13911b06339c2aa02dcc09dce8bbf582cd"}, + {file = "greenlet-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6a08799e9e88052221adca55741bf106ec7ea0710bca635c208b751f0d5b617"}, + {file = "greenlet-2.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e112e03d37987d7b90c1e98ba5e1b59e1645226d78d73282f45b326f7bddcb9"}, + {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56961cfca7da2fdd178f95ca407fa330c64f33289e1804b592a77d5593d9bd94"}, + {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13ba6e8e326e2116c954074c994da14954982ba2795aebb881c07ac5d093a58a"}, + {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bf633a50cc93ed17e494015897361010fc08700d92676c87931d3ea464123ce"}, + {file = "greenlet-2.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9f2c221eecb7ead00b8e3ddb913c67f75cba078fd1d326053225a3f59d850d72"}, + {file = "greenlet-2.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:13ebf93c343dd8bd010cd98e617cb4c1c1f352a0cf2524c82d3814154116aa82"}, + {file = "greenlet-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:6f61d71bbc9b4a3de768371b210d906726535d6ca43506737682caa754b956cd"}, + {file = "greenlet-2.0.1-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:2d0bac0385d2b43a7bd1d651621a4e0f1380abc63d6fb1012213a401cbd5bf8f"}, + {file = "greenlet-2.0.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:f6327b6907b4cb72f650a5b7b1be23a2aab395017aa6f1adb13069d66360eb3f"}, + {file = "greenlet-2.0.1-cp35-cp35m-win32.whl", hash = "sha256:81b0ea3715bf6a848d6f7149d25bf018fd24554a4be01fcbbe3fdc78e890b955"}, + {file = "greenlet-2.0.1-cp35-cp35m-win_amd64.whl", hash = "sha256:38255a3f1e8942573b067510f9611fc9e38196077b0c8eb7a8c795e105f9ce77"}, + {file = "greenlet-2.0.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:04957dc96669be041e0c260964cfef4c77287f07c40452e61abe19d647505581"}, + {file = "greenlet-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:4aeaebcd91d9fee9aa768c1b39cb12214b30bf36d2b7370505a9f2165fedd8d9"}, + {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:974a39bdb8c90a85982cdb78a103a32e0b1be986d411303064b28a80611f6e51"}, + {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dca09dedf1bd8684767bc736cc20c97c29bc0c04c413e3276e0962cd7aeb148"}, + {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c0757db9bd08470ff8277791795e70d0bf035a011a528ee9a5ce9454b6cba2"}, + {file = "greenlet-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5067920de254f1a2dee8d3d9d7e4e03718e8fd2d2d9db962c8c9fa781ae82a39"}, + {file = "greenlet-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:5a8e05057fab2a365c81abc696cb753da7549d20266e8511eb6c9d9f72fe3e92"}, + {file = "greenlet-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:3d75b8d013086b08e801fbbb896f7d5c9e6ccd44f13a9241d2bf7c0df9eda928"}, + {file = "greenlet-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:097e3dae69321e9100202fc62977f687454cd0ea147d0fd5a766e57450c569fd"}, + {file = "greenlet-2.0.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:cb242fc2cda5a307a7698c93173d3627a2a90d00507bccf5bc228851e8304963"}, + {file = "greenlet-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:72b00a8e7c25dcea5946692a2485b1a0c0661ed93ecfedfa9b6687bd89a24ef5"}, + {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5b0ff9878333823226d270417f24f4d06f235cb3e54d1103b71ea537a6a86ce"}, + {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be9e0fb2ada7e5124f5282d6381903183ecc73ea019568d6d63d33f25b2a9000"}, + {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b493db84d124805865adc587532ebad30efa68f79ad68f11b336e0a51ec86c2"}, + {file = "greenlet-2.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0459d94f73265744fee4c2d5ec44c6f34aa8a31017e6e9de770f7bcf29710be9"}, + {file = "greenlet-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a20d33124935d27b80e6fdacbd34205732660e0a1d35d8b10b3328179a2b51a1"}, + {file = "greenlet-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:ea688d11707d30e212e0110a1aac7f7f3f542a259235d396f88be68b649e47d1"}, + {file = "greenlet-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:afe07421c969e259e9403c3bb658968702bc3b78ec0b6fde3ae1e73440529c23"}, + {file = "greenlet-2.0.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:cd4ccc364cf75d1422e66e247e52a93da6a9b73cefa8cad696f3cbbb75af179d"}, + {file = "greenlet-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:4c8b1c43e75c42a6cafcc71defa9e01ead39ae80bd733a2608b297412beede68"}, + {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:659f167f419a4609bc0516fb18ea69ed39dbb25594934bd2dd4d0401660e8a1e"}, + {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:356e4519d4dfa766d50ecc498544b44c0249b6de66426041d7f8b751de4d6b48"}, + {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:811e1d37d60b47cb8126e0a929b58c046251f28117cb16fcd371eed61f66b764"}, + {file = "greenlet-2.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d38ffd0e81ba8ef347d2be0772e899c289b59ff150ebbbbe05dc61b1246eb4e0"}, + {file = "greenlet-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0109af1138afbfb8ae647e31a2b1ab030f58b21dd8528c27beaeb0093b7938a9"}, + {file = "greenlet-2.0.1-cp38-cp38-win32.whl", hash = "sha256:88c8d517e78acdf7df8a2134a3c4b964415b575d2840a2746ddb1cc6175f8608"}, + {file = "greenlet-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d6ee1aa7ab36475035eb48c01efae87d37936a8173fc4d7b10bb02c2d75dd8f6"}, + {file = "greenlet-2.0.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b1992ba9d4780d9af9726bbcef6a1db12d9ab1ccc35e5773685a24b7fb2758eb"}, + {file = "greenlet-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:b5e83e4de81dcc9425598d9469a624826a0b1211380ac444c7c791d4a2137c19"}, + {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:505138d4fa69462447a562a7c2ef723c6025ba12ac04478bc1ce2fcc279a2db5"}, + {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cce1e90dd302f45716a7715517c6aa0468af0bf38e814ad4eab58e88fc09f7f7"}, + {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e9744c657d896c7b580455e739899e492a4a452e2dd4d2b3e459f6b244a638d"}, + {file = "greenlet-2.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:662e8f7cad915ba75d8017b3e601afc01ef20deeeabf281bd00369de196d7726"}, + {file = "greenlet-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:41b825d65f31e394b523c84db84f9383a2f7eefc13d987f308f4663794d2687e"}, + {file = "greenlet-2.0.1-cp39-cp39-win32.whl", hash = "sha256:db38f80540083ea33bdab614a9d28bcec4b54daa5aff1668d7827a9fc769ae0a"}, + {file = "greenlet-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b23d2a46d53210b498e5b701a1913697671988f4bf8e10f935433f6e7c332fb6"}, + {file = "greenlet-2.0.1.tar.gz", hash = "sha256:42e602564460da0e8ee67cb6d7236363ee5e131aa15943b6670e44e5c2ed0f67"}, +] + +[package.extras] +docs = ["Sphinx", "docutils (<0.18)"] +test = ["faulthandler", "objgraph", "psutil"] + +[[package]] +name = "grpcio" +version = "1.51.3" +description = "HTTP/2-based RPC framework" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "grpcio-1.51.3-cp310-cp310-linux_armv7l.whl", hash = "sha256:f601aaeae18dab81930fb8d4f916b0da21e89bb4b5f7367ef793f46b4a76b7b0"}, + {file = "grpcio-1.51.3-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:eef0450a4b5ed11feab639bf3eb1b6e23d0efa9b911bf7b06fb60e14f5f8a585"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82b0ad8ac825d4bb31bff9f638557c045f4a6d824d84b21e893968286f88246b"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3667c06e37d6cd461afdd51cefe6537702f3d1dc5ff4cac07e88d8b4795dc16f"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3709048fe0aa23dda09b3e69849a12055790171dab9e399a72ea8f9dfbf9ac80"}, + {file = "grpcio-1.51.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:200d69857f9910f7458b39b9bcf83ee4a180591b40146ba9e49314e3a7419313"}, + {file = "grpcio-1.51.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cd9a5e68e79c5f031500e67793048a90209711e0854a9ddee8a3ce51728de4e5"}, + {file = "grpcio-1.51.3-cp310-cp310-win32.whl", hash = "sha256:6604f614016127ae10969176bbf12eb0e03d2fb3d643f050b3b69e160d144fb4"}, + {file = "grpcio-1.51.3-cp310-cp310-win_amd64.whl", hash = "sha256:e95c7ccd4c5807adef1602005513bf7c7d14e5a41daebcf9d8d30d8bf51b8f81"}, + {file = "grpcio-1.51.3-cp311-cp311-linux_armv7l.whl", hash = "sha256:5e77ee138100f0bb55cbd147840f87ee6241dbd25f09ea7cd8afe7efff323449"}, + {file = "grpcio-1.51.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:68a7514b754e38e8de9075f7bb4dee919919515ec68628c43a894027e40ddec4"}, + {file = "grpcio-1.51.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c1b9f8afa62ff265d86a4747a2990ec5a96e4efce5d5888f245a682d66eca47"}, + {file = "grpcio-1.51.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8de30f0b417744288cec65ec8cf84b8a57995cf7f1e84ccad2704d93f05d0aae"}, + {file = "grpcio-1.51.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b69c7adc7ed60da1cb1b502853db61f453fc745f940cbcc25eb97c99965d8f41"}, + {file = "grpcio-1.51.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d81528ffe0e973dc840ec73a4132fd18b8203ad129d7410155d951a0a7e4f5d0"}, + {file = "grpcio-1.51.3-cp311-cp311-win32.whl", hash = "sha256:040eb421613b57c696063abde405916dd830203c184c9000fc8c3b3b3c950325"}, + {file = "grpcio-1.51.3-cp311-cp311-win_amd64.whl", hash = "sha256:2a8e17286c4240137d933b8ca506465472248b4ce0fe46f3404459e708b65b68"}, + {file = "grpcio-1.51.3-cp37-cp37m-linux_armv7l.whl", hash = "sha256:d5cd1389669a847555df54177b911d9ff6f17345b2a6f19388707b7a9f724c88"}, + {file = "grpcio-1.51.3-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:be1bf35ce82cdbcac14e39d5102d8de4079a1c1a6a06b68e41fcd9ef64f9dd28"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:5eed34994c095e2bf7194ffac7381c6068b057ef1e69f8f08db77771350a7566"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f9a7d88082b2a17ae7bd3c2354d13bab0453899e0851733f6afa6918373f476"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c8abbc5f837111e7bd619612eedc223c290b0903b952ce0c7b00840ea70f14"}, + {file = "grpcio-1.51.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:165b05af77e6aecb4210ae7663e25acf234ba78a7c1c157fa5f2efeb0d6ec53c"}, + {file = "grpcio-1.51.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54e36c2ee304ff15f2bfbdc43d2b56c63331c52d818c364e5b5214e5bc2ad9f6"}, + {file = "grpcio-1.51.3-cp37-cp37m-win32.whl", hash = "sha256:cd0daac21d9ef5e033a5100c1d3aa055bbed28bfcf070b12d8058045c4e821b1"}, + {file = "grpcio-1.51.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2fdd6333ce96435408565a9dbbd446212cd5d62e4d26f6a3c0feb1e3c35f1cc8"}, + {file = "grpcio-1.51.3-cp38-cp38-linux_armv7l.whl", hash = "sha256:54b0c29bdd9a3b1e1b61443ab152f060fc719f1c083127ab08d03fac5efd51be"}, + {file = "grpcio-1.51.3-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:ffaaf7e93fcb437356b5a4b23bf36e8a3d0221399ff77fd057e4bc77776a24be"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:eafbe7501a3268d05f2e450e1ddaffb950d842a8620c13ec328b501d25d2e2c3"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:881ecb34feabf31c6b3b9bbbddd1a5b57e69f805041e5a2c6c562a28574f71c4"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e860a3222139b41d430939bbec2ec9c3f6c740938bf7a04471a9a8caaa965a2e"}, + {file = "grpcio-1.51.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:49ede0528e9dac7e8a9fe30b16c73b630ddd9a576bf4b675eb6b0c53ee5ca00f"}, + {file = "grpcio-1.51.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6972b009638b40a448d10e1bc18e2223143b8a7aa20d7def0d78dd4af4126d12"}, + {file = "grpcio-1.51.3-cp38-cp38-win32.whl", hash = "sha256:5694448256e3cdfe5bd358f1574a3f2f51afa20cc834713c4b9788d60b7cc646"}, + {file = "grpcio-1.51.3-cp38-cp38-win_amd64.whl", hash = "sha256:3ea4341efe603b049e8c9a5f13c696ca37fcdf8a23ca35f650428ad3606381d9"}, + {file = "grpcio-1.51.3-cp39-cp39-linux_armv7l.whl", hash = "sha256:6c677581ce129f5fa228b8f418cee10bd28dd449f3a544ea73c8ba590ee49d0b"}, + {file = "grpcio-1.51.3-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:30e09b5e0531685e176f49679b6a3b190762cc225f4565e55a899f5e14b3aa62"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:c831f31336e81243f85b6daff3e5e8a123302ce0ea1f2726ad752fd7a59f3aee"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2cd2e4cefb724cab1ba2df4b7535a9980531b9ec51b4dbb5f137a1f3a3754ef0"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a0d0bf44438869d307f85a54f25a896ad6b4b0ca12370f76892ad732928d87"}, + {file = "grpcio-1.51.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c02abd55409bfb293371554adf6a4401197ec2133dd97727c01180889014ba4d"}, + {file = "grpcio-1.51.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2f8ff75e61e1227ba7a3f16b2eadbcc11d0a54096d52ab75a6b88cfbe56f55d1"}, + {file = "grpcio-1.51.3-cp39-cp39-win32.whl", hash = "sha256:6c99a73a6260bdf844b2e5ddad02dcd530310f80e1fa72c300fa19c1c7496962"}, + {file = "grpcio-1.51.3-cp39-cp39-win_amd64.whl", hash = "sha256:22bdfac4f7f27acdd4da359b5e7e1973dc74bf1ed406729b07d0759fde2f064b"}, + {file = "grpcio-1.51.3.tar.gz", hash = "sha256:be7b2265b7527bb12109a7727581e274170766d5b3c9258d4e466f4872522d7a"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.51.3)"] + +[[package]] +name = "grpcio-tools" +version = "1.48.2" +description = "Protobuf code generator for gRPC" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "grpcio-tools-1.48.2.tar.gz", hash = "sha256:8902a035708555cddbd61b5467cea127484362decc52de03f061a1a520fe90cd"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-linux_armv7l.whl", hash = "sha256:92acc3e10ba2b0dcb90a88ae9fe1cc0ffba6868545207e4ff20ca95284f8e3c9"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e5bb396d63495667d4df42e506eed9d74fc9a51c99c173c04395fe7604c848f1"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:84a84d601a238572d049d3108e04fe4c206536e81076d56e623bd525a1b38def"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70564521e86a0de35ea9ac6daecff10cb46860aec469af65869974807ce8e98b"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdbbe63f6190187de5946891941629912ac8196701ed2253fa91624a397822ec"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae56f133b05b7e5d780ef7e032dd762adad7f3dc8f64adb43ff5bfabd659f435"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0feb4f2b777fa6377e977faa89c26359d4f31953de15e035505b92f41aa6906"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-win32.whl", hash = "sha256:80f450272316ca0924545f488c8492649ca3aeb7044d4bf59c426dcdee527f7c"}, + {file = "grpcio_tools-1.48.2-cp310-cp310-win_amd64.whl", hash = "sha256:21ff50e321736eba22210bf9b94e05391a9ac345f26e7df16333dc75d63e74fb"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-linux_armv7l.whl", hash = "sha256:d598ccde6338b2cfbb3124f34c95f03394209013f9b1ed4a5360a736853b1c27"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:a43d26714933f23de93ea0bf9c86c66a6ede709b8ca32e357f9e2181703e64ae"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-manylinux_2_17_aarch64.whl", hash = "sha256:55fdebc73fb580717656b1bafa4f8eca448726a7aa22726a6c0a7895d2f0f088"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8588819b22d0de3aa1951e1991cc3e4b9aa105eecf6e3e24eb0a2fc8ab958b3e"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9771d4d317dca029dfaca7ec9282d8afe731c18bc536ece37fd39b8a974cc331"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d886a9e052a038642b3af5d18e6f2085d1656d9788e202dc23258cf3a751e7ca"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d77e8b1613876e0d8fd17709509d4ceba13492816426bd156f7e88a4c47e7158"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-win32.whl", hash = "sha256:dcaaecdd5e847de5c1d533ea91522bf56c9e6b2dc98cdc0d45f0a1c26e846ea2"}, + {file = "grpcio_tools-1.48.2-cp36-cp36m-win_amd64.whl", hash = "sha256:0119aabd9ceedfdf41b56b9fdc8284dd85a7f589d087f2694d743f346a368556"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-linux_armv7l.whl", hash = "sha256:189be2a9b672300ca6845d94016bdacc052fdbe9d1ae9e85344425efae2ff8ef"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:9443f5c30bac449237c3cf99da125f8d6e6c01e17972bc683ee73b75dea95573"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:e0403e095b343431195db1305248b50019ad55d3dd310254431af87e14ef83a2"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5410d6b601d1404835e34466bd8aee37213489b36ee1aad2276366e265ff29d4"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be91b7c7056ff9ee48b1eccd4a2840b0126230803a5e09dfc082a5b16a91c1"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:516eedd5eb7af6326050bc2cfceb3a977b9cc1144f283c43cc4956905285c912"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d18599ab572b2f15a8f3db49503272d1bb4fcabb4b4d1214ef03aca1816b20a0"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-win32.whl", hash = "sha256:d18ef2adc05a8ef9e58ac46357f6d4ce7e43e077c7eda0a4425773461f9d0e6e"}, + {file = "grpcio_tools-1.48.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d9753944e5a6b6b78b76ce9d2ae0fe3f748008c1849deb7fadcb64489d6553b"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-linux_armv7l.whl", hash = "sha256:3c8749dca04a8d302862ceeb1dfbdd071ee13b281395975f24405a347e5baa57"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:7307dd2408b82ea545ae63502ec03036b025f449568556ea9a056e06129a7a4e"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:072234859f6069dc43a6be8ad6b7d682f4ba1dc2e2db2ebf5c75f62eee0f6dfb"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cc298fbfe584de8876a85355efbcf796dfbcfac5948c9560f5df82e79336e2a"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f75973a42c710999acd419968bc79f00327e03e855bbe82c6529e003e49af660"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f766050e491d0b3203b6b85638015f543816a2eb7d089fc04e86e00f6de0e31d"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8e0d74403484eb77e8df2566a64b8b0b484b5c87903678c381634dd72f252d5e"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-win32.whl", hash = "sha256:cb75bac0cd43858cb759ef103fe68f8c540cb58b63dda127e710228fec3007b8"}, + {file = "grpcio_tools-1.48.2-cp38-cp38-win_amd64.whl", hash = "sha256:cabc8b0905cedbc3b2b7b2856334fa35cce3d4bc79ae241cacd8cca8940a5c85"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-linux_armv7l.whl", hash = "sha256:e712a6d00606ad19abdeae852a7e521d6f6d0dcea843708fecf3a38be16a851e"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:e7e7668f89fd598c5469bb58e16bfd12b511d9947ccc75aec94da31f62bc3758"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:a415fbec67d4ff7efe88794cbe00cf548d0f0a5484cceffe0a0c89d47694c491"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d96e96ae7361aa51c9cd9c73b677b51f691f98df6086860fcc3c45852d96b0b0"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e20d7885a40e68a2bda92908acbabcdf3c14dd386c3845de73ba139e9df1f132"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8a5614251c46da07549e24f417cf989710250385e9d80deeafc53a0ee7df6325"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ace0035766fe01a1b096aa050be9f0a9f98402317e7aeff8bfe55349be32a407"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-win32.whl", hash = "sha256:4fa4300b1be59b046492ed3c5fdb59760bc6433f44c08f50de900f9552ec7461"}, + {file = "grpcio_tools-1.48.2-cp39-cp39-win_amd64.whl", hash = "sha256:0fb6c1c1e56eb26b224adc028a4204b6ad0f8b292efa28067dff273bbc8b27c4"}, +] + +[package.dependencies] +grpcio = ">=1.48.2" +protobuf = ">=3.12.0,<4.0dev" +setuptools = "*" + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +category = "main" +optional = true +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "h5py" +version = "3.8.0" +description = "Read and write HDF5 files from Python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "h5py-3.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:533d7dad466ddb7e3b30af274b630eb7c1a6e4ddf01d1c373a0334dc2152110a"}, + {file = "h5py-3.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c873ba9fd4fa875ad62ce0e4891725e257a8fe7f5abdbc17e51a5d54819be55c"}, + {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98a240cd4c1bfd568aaa52ec42d263131a2582dab82d74d3d42a0d954cac12be"}, + {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3389b63222b1c7a158bb7fe69d11ca00066740ec5574596d47a2fe5317f563a"}, + {file = "h5py-3.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:7f3350fc0a8407d668b13247861c2acd23f7f5fe7d060a3ad9b0820f5fcbcae0"}, + {file = "h5py-3.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db03e3f2c716205fbdabb34d0848459840585225eb97b4f08998c743821ca323"}, + {file = "h5py-3.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36761693efbe53df179627a775476dcbc37727d6e920958277a7efbc18f1fb73"}, + {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a506fc223def428f4329e7e1f9fe1c8c593eab226e7c0942c8d75308ad49950"}, + {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33b15aae79e9147aebe1d0e54099cbcde8d65e3e227cd5b59e49b1272aa0e09d"}, + {file = "h5py-3.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:9f6f6ffadd6bfa9b2c5b334805eb4b19ca0a5620433659d8f7fb86692c40a359"}, + {file = "h5py-3.8.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8f55d9c6c84d7d09c79fb85979e97b81ec6071cc776a97eb6b96f8f6ec767323"}, + {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b685453e538b2b5934c58a644ac3f3b3d0cec1a01b6fb26d57388e9f9b674ad0"}, + {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377865821fe80ad984d003723d6f8890bd54ceeb5981b43c0313b9df95411b30"}, + {file = "h5py-3.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0fef76e10b9216657fa37e7edff6d8be0709b25bd5066474c229b56cf0098df9"}, + {file = "h5py-3.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:26ffc344ec9984d2cd3ca0265007299a8bac8d85c1ad48f4639d8d3aed2af171"}, + {file = "h5py-3.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bacaa1c16810dd2b3e4417f8e730971b7c4d53d234de61fe4a918db78e80e1e4"}, + {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bae730580ae928de409d63cbe4fdca4c82c3ad2bed30511d19d34e995d63c77e"}, + {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47f757d1b76f0ecb8aa0508ec8d1b390df67a8b67ee2515dc1b046f3a1596ea"}, + {file = "h5py-3.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f891b17e3a3e974e93f9e34e7cca9f530806543571ce078998676a555837d91d"}, + {file = "h5py-3.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:290e00fa2de74a10688d1bac98d5a9cdd43f14f58e562c580b5b3dfbd358ecae"}, + {file = "h5py-3.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:03890b1c123d024fb0239a3279737d5432498c1901c354f8b10d8221d1d16235"}, + {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7865de06779b14d98068da387333ad9bf2756b5b579cc887fac169bc08f87c3"}, + {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49bc857635f935fa30e92e61ac1e87496df8f260a6945a3235e43a9890426866"}, + {file = "h5py-3.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5fd2252d1fc364ba0e93dd0b7089f4906b66805cb4e6aca7fa8874ac08649647"}, + {file = "h5py-3.8.0.tar.gz", hash = "sha256:6fead82f0c4000cf38d53f9c030780d81bfa0220218aee13b90b7701c937d95f"}, +] + +[package.dependencies] +numpy = ">=1.14.5" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +category = "main" +optional = true +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + +[[package]] +name = "httpcore" +version = "0.16.3" +description = "A minimal low-level HTTP client." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "httpcore-0.16.3-py3-none-any.whl", hash = "sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0"}, + {file = "httpcore-0.16.3.tar.gz", hash = "sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb"}, +] + +[package.dependencies] +anyio = ">=3.0,<5.0" +certifi = "*" +h11 = ">=0.13,<0.15" +sniffio = ">=1.0.0,<2.0.0" + +[package.extras] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] + +[[package]] +name = "httplib2" +version = "0.22.0" +description = "A comprehensive HTTP client library." +category = "main" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, + {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, +] + +[package.dependencies] +pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} + +[[package]] +name = "httpx" +version = "0.23.3" +description = "The next generation HTTP client." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "httpx-0.23.3-py3-none-any.whl", hash = "sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6"}, + {file = "httpx-0.23.3.tar.gz", hash = "sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9"}, +] + +[package.dependencies] +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = ">=0.15.0,<0.17.0" +rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<13)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] + +[[package]] +name = "huggingface-hub" +version = "0.13.3" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.13.3-py3-none-any.whl", hash = "sha256:f73a298a55028575334f9670d86b8171a4dd890b320315f3ad28a20b9eb3b5bc"}, + {file = "huggingface_hub-0.13.3.tar.gz", hash = "sha256:1f95f65c5e7aa76728701402f55b697ee8a8b50234adda91fbdbb81038fbcd21"}, +] + +[package.dependencies] +filelock = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"] +torch = ["torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "humbug" +version = "0.3.0" +description = "Humbug: Do you build developer tools? Humbug helps you know your users." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "humbug-0.3.0-py3-none-any.whl", hash = "sha256:98aecb6d325f292db0de32778494018434e38893487e4286dd512606ed1e6aeb"}, + {file = "humbug-0.3.0.tar.gz", hash = "sha256:11c9daf9fad7281f6db197144a44e4a0283912d8647f84ca7c84e0e67b0d70cf"}, +] + +[package.dependencies] +requests = "*" + +[package.extras] +dev = ["black", "mypy", "types-dataclasses", "types-pkg-resources", "types-psutil", "types-requests", "wheel"] +distribute = ["setuptools", "twine", "wheel"] +profile = ["GPUtil", "psutil", "types-psutil"] + +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +category = "main" +optional = true +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] + +[[package]] +name = "importlib-metadata" +version = "6.1.0" +description = "Read metadata from Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"}, + {file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, + {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "ipykernel" +version = "6.22.0" +description = "IPython Kernel for Jupyter" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ipykernel-6.22.0-py3-none-any.whl", hash = "sha256:1ae6047c1277508933078163721bbb479c3e7292778a04b4bacf0874550977d6"}, + {file = "ipykernel-6.22.0.tar.gz", hash = "sha256:302558b81f1bc22dc259fb2a0c5c7cf2f4c0bdb21b50484348f7bafe7fb71421"}, +] + +[package.dependencies] +appnope = {version = "*", markers = "platform_system == \"Darwin\""} +comm = ">=0.1.1" +debugpy = ">=1.6.5" +ipython = ">=7.23.1" +jupyter-client = ">=6.1.12" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +matplotlib-inline = ">=0.1" +nest-asyncio = "*" +packaging = "*" +psutil = "*" +pyzmq = ">=20" +tornado = ">=6.1" +traitlets = ">=5.4.0" + +[package.extras] +cov = ["coverage[toml]", "curio", "matplotlib", "pytest-cov", "trio"] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "trio"] +pyqt5 = ["pyqt5"] +pyside6 = ["pyside6"] +test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "ipython" +version = "8.11.0" +description = "IPython: Productive Interactive Computing" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ipython-8.11.0-py3-none-any.whl", hash = "sha256:5b54478e459155a326bf5f42ee4f29df76258c0279c36f21d71ddb560f88b156"}, + {file = "ipython-8.11.0.tar.gz", hash = "sha256:735cede4099dbc903ee540307b9171fbfef4aa75cfcacc5a273b2cda2f02be04"}, +] + +[package.dependencies] +appnope = {version = "*", markers = "sys_platform == \"darwin\""} +backcall = "*" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +decorator = "*" +jedi = ">=0.16" +matplotlib-inline = "*" +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +pickleshare = "*" +prompt-toolkit = ">=3.0.30,<3.0.37 || >3.0.37,<3.1.0" +pygments = ">=2.4.0" +stack-data = "*" +traitlets = ">=5" + +[package.extras] +all = ["black", "curio", "docrepr", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.21)", "pandas", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"] +black = ["black"] +doc = ["docrepr", "ipykernel", "matplotlib", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["ipywidgets", "notebook"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["pytest (<7.1)", "pytest-asyncio", "testpath"] +test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pandas", "pytest (<7.1)", "pytest-asyncio", "testpath", "trio"] + +[[package]] +name = "ipython-genutils" +version = "0.2.0" +description = "Vestigial utilities from IPython" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, + {file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"}, +] + +[[package]] +name = "ipywidgets" +version = "8.0.5" +description = "Jupyter interactive widgets" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ipywidgets-8.0.5-py3-none-any.whl", hash = "sha256:a6e5c0392f86207fae304688a670afb26b2fd819592cfc0812777c2fdf22dbad"}, + {file = "ipywidgets-8.0.5.tar.gz", hash = "sha256:89a1930b9ef255838571a2415cc4a15e824e4316b8f067805d1d03b98b6a8c5f"}, +] + +[package.dependencies] +ipython = ">=6.1.0" +jupyterlab-widgets = ">=3.0,<4.0" +traitlets = ">=4.3.1" +widgetsnbextension = ">=4.0,<5.0" + +[package.extras] +test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"] + +[[package]] +name = "isoduration" +version = "20.11.0" +description = "Operations with ISO 8601 durations" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042"}, + {file = "isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9"}, +] + +[package.dependencies] +arrow = ">=0.15.0" + +[[package]] +name = "jaraco-context" +version = "4.3.0" +description = "Context managers by jaraco" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "jaraco.context-4.3.0-py3-none-any.whl", hash = "sha256:5d9e95ca0faa78943ed66f6bc658dd637430f16125d86988e77844c741ff2f11"}, + {file = "jaraco.context-4.3.0.tar.gz", hash = "sha256:4dad2404540b936a20acedec53355bdaea223acb88fd329fa6de9261c941566e"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[[package]] +name = "jedi" +version = "0.18.2" +description = "An autocompletion tool for Python that can be used for text editors." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "jedi-0.18.2-py2.py3-none-any.whl", hash = "sha256:203c1fd9d969ab8f2119ec0a3342e0b49910045abe6af0a3ae83a5764d54639e"}, + {file = "jedi-0.18.2.tar.gz", hash = "sha256:bae794c30d07f6d910d32a7048af09b5a39ed740918da923c6b780790ebac612"}, +] + +[package.dependencies] +parso = ">=0.8.0,<0.9.0" + +[package.extras] +docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + +[[package]] +name = "joblib" +version = "1.2.0" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "joblib-1.2.0-py3-none-any.whl", hash = "sha256:091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385"}, + {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"}, +] + +[[package]] +name = "jsonlines" +version = "3.1.0" +description = "Library with helpers for the jsonlines file format" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonlines-3.1.0-py3-none-any.whl", hash = "sha256:632f5e38f93dfcb1ac8c4e09780b92af3a55f38f26e7c47ae85109d420b6ad39"}, + {file = "jsonlines-3.1.0.tar.gz", hash = "sha256:2579cb488d96f815b0eb81629e3e6b0332da0962a18fa3532958f7ba14a5c37f"}, +] + +[package.dependencies] +attrs = ">=19.2.0" + +[[package]] +name = "jsonpointer" +version = "2.3" +description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "jsonpointer-2.3-py2.py3-none-any.whl", hash = "sha256:51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9"}, + {file = "jsonpointer-2.3.tar.gz", hash = "sha256:97cba51526c829282218feb99dab1b1e6bdf8efd1c43dc9d57be093c0d69c99a"}, +] + +[[package]] +name = "jsonschema" +version = "4.17.3" +description = "An implementation of JSON Schema validation for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"}, + {file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"}, +] + +[package.dependencies] +attrs = ">=17.4.0" +fqdn = {version = "*", optional = true, markers = "extra == \"format-nongpl\""} +idna = {version = "*", optional = true, markers = "extra == \"format-nongpl\""} +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +isoduration = {version = "*", optional = true, markers = "extra == \"format-nongpl\""} +jsonpointer = {version = ">1.13", optional = true, markers = "extra == \"format-nongpl\""} +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} +pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" +rfc3339-validator = {version = "*", optional = true, markers = "extra == \"format-nongpl\""} +rfc3986-validator = {version = ">0.1.0", optional = true, markers = "extra == \"format-nongpl\""} +uri-template = {version = "*", optional = true, markers = "extra == \"format-nongpl\""} +webcolors = {version = ">=1.11", optional = true, markers = "extra == \"format-nongpl\""} + +[package.extras] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] + +[[package]] +name = "jupyter" +version = "1.0.0" +description = "Jupyter metapackage. Install all the Jupyter components in one go." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "jupyter-1.0.0-py2.py3-none-any.whl", hash = "sha256:5b290f93b98ffbc21c0c7e749f054b3267782166d72fa5e3ed1ed4eaf34a2b78"}, + {file = "jupyter-1.0.0.tar.gz", hash = "sha256:d9dc4b3318f310e34c82951ea5d6683f67bed7def4b259fafbfe4f1beb1d8e5f"}, + {file = "jupyter-1.0.0.zip", hash = "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7"}, +] + +[package.dependencies] +ipykernel = "*" +ipywidgets = "*" +jupyter-console = "*" +nbconvert = "*" +notebook = "*" +qtconsole = "*" + +[[package]] +name = "jupyter-cache" +version = "0.5.0" +description = "A defined interface for working with a cache of jupyter notebooks." +category = "dev" +optional = false +python-versions = "~=3.7" +files = [ + {file = "jupyter-cache-0.5.0.tar.gz", hash = "sha256:87408030a4c8c14fe3f8fe62e6ceeb24c84e544c7ced20bfee45968053d07801"}, + {file = "jupyter_cache-0.5.0-py3-none-any.whl", hash = "sha256:642e434b9b75c4b94dc8346eaf5a639c8926a0673b87e5e8ef6460d5cf2c9516"}, +] + +[package.dependencies] +attrs = "*" +click = "*" +importlib-metadata = "*" +nbclient = ">=0.2,<0.6" +nbformat = "*" +pyyaml = "*" +sqlalchemy = ">=1.3.12,<1.5" +tabulate = "*" + +[package.extras] +cli = ["click-log"] +code-style = ["pre-commit (>=2.12,<3.0)"] +rtd = ["jupytext", "myst-nb (>=0.12.3,<0.13.0)", "nbdime", "sphinx-book-theme (>=0.1.1,<0.2.0)", "sphinx-copybutton"] +testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbformat (>=5.1)", "numpy", "pandas", "pytest (>=6,<7)", "pytest-cov", "pytest-regressions", "sympy"] + +[[package]] +name = "jupyter-client" +version = "8.1.0" +description = "Jupyter protocol implementation and client libraries" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyter_client-8.1.0-py3-none-any.whl", hash = "sha256:d5b8e739d7816944be50f81121a109788a3d92732ecf1ad1e4dadebc948818fe"}, + {file = "jupyter_client-8.1.0.tar.gz", hash = "sha256:3fbab64100a0dcac7701b1e0f1a4412f1ccb45546ff2ad9bc4fcbe4e19804811"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""} +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +python-dateutil = ">=2.8.2" +pyzmq = ">=23.0" +tornado = ">=6.2" +traitlets = ">=5.3" + +[package.extras] +docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] +test = ["codecov", "coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] + +[[package]] +name = "jupyter-console" +version = "6.6.3" +description = "Jupyter terminal console" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jupyter_console-6.6.3-py3-none-any.whl", hash = "sha256:309d33409fcc92ffdad25f0bcdf9a4a9daa61b6f341177570fdac03de5352485"}, + {file = "jupyter_console-6.6.3.tar.gz", hash = "sha256:566a4bf31c87adbfadf22cdf846e3069b59a71ed5da71d6ba4d8aaad14a53539"}, +] + +[package.dependencies] +ipykernel = ">=6.14" +ipython = "*" +jupyter-client = ">=7.0.0" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +prompt-toolkit = ">=3.0.30" +pygments = "*" +pyzmq = ">=17" +traitlets = ">=5.4" + +[package.extras] +test = ["flaky", "pexpect", "pytest"] + +[[package]] +name = "jupyter-core" +version = "5.3.0" +description = "Jupyter core package. A base package on which Jupyter projects rely." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyter_core-5.3.0-py3-none-any.whl", hash = "sha256:d4201af84559bc8c70cead287e1ab94aeef3c512848dde077b7684b54d67730d"}, + {file = "jupyter_core-5.3.0.tar.gz", hash = "sha256:6db75be0c83edbf1b7c9f91ec266a9a24ef945da630f3120e1a0046dc13713fc"}, +] + +[package.dependencies] +platformdirs = ">=2.5" +pywin32 = {version = ">=300", markers = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""} +traitlets = ">=5.3" + +[package.extras] +docs = ["myst-parser", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] +test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "jupyter-events" +version = "0.6.3" +description = "Jupyter Event System library" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jupyter_events-0.6.3-py3-none-any.whl", hash = "sha256:57a2749f87ba387cd1bfd9b22a0875b889237dbf2edc2121ebb22bde47036c17"}, + {file = "jupyter_events-0.6.3.tar.gz", hash = "sha256:9a6e9995f75d1b7146b436ea24d696ce3a35bfa8bfe45e0c33c334c79464d0b3"}, +] + +[package.dependencies] +jsonschema = {version = ">=3.2.0", extras = ["format-nongpl"]} +python-json-logger = ">=2.0.4" +pyyaml = ">=5.3" +rfc3339-validator = "*" +rfc3986-validator = ">=0.1.1" +traitlets = ">=5.3" + +[package.extras] +cli = ["click", "rich"] +docs = ["jupyterlite-sphinx", "myst-parser", "pydata-sphinx-theme", "sphinxcontrib-spelling"] +test = ["click", "coverage", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "pytest-console-scripts", "pytest-cov", "rich"] + +[[package]] +name = "jupyter-server" +version = "2.5.0" +description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyter_server-2.5.0-py3-none-any.whl", hash = "sha256:e6bc1e9e96d7c55b9ce9699ff6cb9a910581fe7349e27c40389acb67632e24c0"}, + {file = "jupyter_server-2.5.0.tar.gz", hash = "sha256:9fde612791f716fd34d610cd939704a9639643744751ba66e7ee8fdc9cead07e"}, +] + +[package.dependencies] +anyio = ">=3.1.0" +argon2-cffi = "*" +jinja2 = "*" +jupyter-client = ">=7.4.4" +jupyter-core = ">=4.12,<5.0.0 || >=5.1.0" +jupyter-events = ">=0.4.0" +jupyter-server-terminals = "*" +nbconvert = ">=6.4.4" +nbformat = ">=5.3.0" +packaging = "*" +prometheus-client = "*" +pywinpty = {version = "*", markers = "os_name == \"nt\""} +pyzmq = ">=24" +send2trash = "*" +terminado = ">=0.8.3" +tornado = ">=6.2.0" +traitlets = ">=5.6.0" +websocket-client = "*" + +[package.extras] +docs = ["docutils (<0.20)", "ipykernel", "jinja2", "jupyter-client", "jupyter-server", "mistune (<1.0.0)", "myst-parser", "nbformat", "prometheus-client", "pydata-sphinx-theme", "send2trash", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-openapi", "sphinxcontrib-spelling", "sphinxemoji", "tornado", "typing-extensions"] +test = ["ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-scripts", "pytest-jupyter[server] (>=0.4)", "pytest-timeout", "requests"] + +[[package]] +name = "jupyter-server-terminals" +version = "0.4.4" +description = "A Jupyter Server Extension Providing Terminals." +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyter_server_terminals-0.4.4-py3-none-any.whl", hash = "sha256:75779164661cec02a8758a5311e18bb8eb70c4e86c6b699403100f1585a12a36"}, + {file = "jupyter_server_terminals-0.4.4.tar.gz", hash = "sha256:57ab779797c25a7ba68e97bcfb5d7740f2b5e8a83b5e8102b10438041a7eac5d"}, +] + +[package.dependencies] +pywinpty = {version = ">=2.0.3", markers = "os_name == \"nt\""} +terminado = ">=0.8.3" + +[package.extras] +docs = ["jinja2", "jupyter-server", "mistune (<3.0)", "myst-parser", "nbformat", "packaging", "pydata-sphinx-theme", "sphinxcontrib-github-alt", "sphinxcontrib-openapi", "sphinxcontrib-spelling", "sphinxemoji", "tornado"] +test = ["coverage", "jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-cov", "pytest-jupyter[server] (>=0.5.3)", "pytest-timeout"] + +[[package]] +name = "jupyterlab-pygments" +version = "0.2.2" +description = "Pygments theme using JupyterLab CSS variables" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jupyterlab_pygments-0.2.2-py2.py3-none-any.whl", hash = "sha256:2405800db07c9f770863bcf8049a529c3dd4d3e28536638bd7c1c01d2748309f"}, + {file = "jupyterlab_pygments-0.2.2.tar.gz", hash = "sha256:7405d7fde60819d905a9fa8ce89e4cd830e318cdad22a0030f7a901da705585d"}, +] + +[[package]] +name = "jupyterlab-widgets" +version = "3.0.6" +description = "Jupyter interactive widgets for JupyterLab" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jupyterlab_widgets-3.0.6-py3-none-any.whl", hash = "sha256:e95d08adf4f9c37a57da5fff8a65d00480199885fd2ecd2583fd9560b594b4e9"}, + {file = "jupyterlab_widgets-3.0.6.tar.gz", hash = "sha256:a464d68a7b9ebabdc135196389381412a39503d89302be0867d0ff3b2428ebb8"}, +] + +[[package]] +name = "keras" +version = "2.11.0" +description = "Deep learning for humans." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "keras-2.11.0-py2.py3-none-any.whl", hash = "sha256:38c6fff0ea9a8b06a2717736565c92a73c8cd9b1c239e7125ccb188b7848f65e"}, +] + +[[package]] +name = "langcodes" +version = "3.3.0" +description = "Tools for labeling human languages with IETF language tags" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "langcodes-3.3.0-py3-none-any.whl", hash = "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69"}, + {file = "langcodes-3.3.0.tar.gz", hash = "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6"}, +] + +[package.extras] +data = ["language-data (>=1.1,<2.0)"] + +[[package]] +name = "libclang" +version = "16.0.0" +description = "Clang Python Bindings, mirrored from the official LLVM repo: https://github.com/llvm/llvm-project/tree/main/clang/bindings/python, to make the installation process easier." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "libclang-16.0.0-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:65258a6bb3e7dc31dc9b26f8d42f53c9d3b959643ade291fcd1aef4855303ca6"}, + {file = "libclang-16.0.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:af55a4aa86fdfe6b2ec68bc8cfe5fdac6c448d591ca7648be86ca17099b41ca8"}, + {file = "libclang-16.0.0-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:a043138caaf2cb076ebb060c6281ec95612926645d425c691991fc9df00e8a24"}, + {file = "libclang-16.0.0-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:eb59652cb0559c0e71784ff4c8ba24c14644becc907b1446563ecfaa622d523b"}, + {file = "libclang-16.0.0-py2.py3-none-manylinux2014_armv7l.whl", hash = "sha256:7b6686b67a0daa84b4c614bcc119578329fc4fbb52b919565b7376b507c4793b"}, + {file = "libclang-16.0.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2adce42ae652f312245b8f4eda6f30b4076fb61f7619f2dfd0a0c31dee4c32b9"}, + {file = "libclang-16.0.0-py2.py3-none-win_amd64.whl", hash = "sha256:ee20bf93e3dd330f71fc50cdbf13b92ced0aec8e540be64251db53502a9b33f7"}, + {file = "libclang-16.0.0-py2.py3-none-win_arm64.whl", hash = "sha256:bf4628fc4da7a1dd06a244f9b8e121c5ec68076a763c59d6b13cbb103acc935b"}, +] + +[[package]] +name = "linkchecker" +version = "10.2.1" +description = "check links in web documents or full websites" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "LinkChecker-10.2.1-py3-none-any.whl", hash = "sha256:5438496290826f5e2f4a2041f11482608378150b6c2d05ca8f94f460b7cb7c9e"}, + {file = "LinkChecker-10.2.1.tar.gz", hash = "sha256:97eae069ccfe892a18e380c7f4762dfe3f352e87c442ef6124e8c60b887cddcd"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.8.1" +dnspython = ">=2.0" +requests = ">=2.20" + +[[package]] +name = "livereload" +version = "2.6.3" +description = "Python LiveReload is an awesome tool for web developers" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "livereload-2.6.3-py2.py3-none-any.whl", hash = "sha256:ad4ac6f53b2d62bb6ce1a5e6e96f1f00976a32348afedcb4b6d68df2a1d346e4"}, + {file = "livereload-2.6.3.tar.gz", hash = "sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869"}, +] + +[package.dependencies] +six = "*" +tornado = {version = "*", markers = "python_version > \"2.7\""} + +[[package]] +name = "loguru" +version = "0.6.0" +description = "Python logging made (stupidly) simple" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"}, + {file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils (==0.16)", "flake8 (>=3.7.7)", "isort (>=5.1.1)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "tox (>=3.9.0)"] + +[[package]] +name = "manifest-ml" +version = "0.0.1" +description = "Manifest for Prompt Programming Foundation Models." +category = "main" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "manifest-ml-0.0.1.tar.gz", hash = "sha256:f828faf7de41fad5318254beec08acdf5142196e0e22203a4047412c2d3127a0"}, + {file = "manifest_ml-0.0.1-py2.py3-none-any.whl", hash = "sha256:fc4e62e706fd767fd8851d91051fdb71bc79b2df9c66f5879736c46d8163a316"}, +] + +[package.dependencies] +dill = ">=0.3.5" +redis = ">=4.3.1" +requests = ">=2.27.1" +sqlitedict = ">=2.0.0" +tqdm = ">=4.64.0" + +[package.extras] +all = ["Flask (>=2.1.2)", "accelerate (>=0.10.0)", "autopep8 (>=1.6.0)", "black (>=22.3.0)", "docformatter (>=1.4)", "flake8 (>=4.0.0)", "flake8-docstrings (>=1.6.0)", "isort (>=5.9.3)", "mypy (>=0.950)", "nbsphinx (>=0.8.0)", "pep8-naming (>=0.12.1)", "pre-commit (>=2.14.0)", "pytest (>=7.0.0)", "pytest-cov (>=3.0.0)", "python-dotenv (>=0.20.0)", "recommonmark (>=0.7.1)", "sphinx-autobuild", "sphinx-rtd-theme (>=0.5.1)", "torch (>=1.8.0)", "transformers (>=4.20.0)", "twine", "types-PyYAML (>=6.0.7)", "types-protobuf (>=3.19.21)", "types-python-dateutil (>=2.8.16)", "types-redis (>=4.2.6)", "types-requests (>=2.27.29)", "types-setuptools (>=57.4.17)"] +api = ["Flask (>=2.1.2)", "accelerate (>=0.10.0)", "torch (>=1.8.0)", "transformers (>=4.20.0)"] +dev = ["autopep8 (>=1.6.0)", "black (>=22.3.0)", "docformatter (>=1.4)", "flake8 (>=4.0.0)", "flake8-docstrings (>=1.6.0)", "isort (>=5.9.3)", "mypy (>=0.950)", "nbsphinx (>=0.8.0)", "pep8-naming (>=0.12.1)", "pre-commit (>=2.14.0)", "pytest (>=7.0.0)", "pytest-cov (>=3.0.0)", "python-dotenv (>=0.20.0)", "recommonmark (>=0.7.1)", "sphinx-autobuild", "sphinx-rtd-theme (>=0.5.1)", "twine", "types-PyYAML (>=6.0.7)", "types-protobuf (>=3.19.21)", "types-python-dateutil (>=2.8.16)", "types-redis (>=4.2.6)", "types-requests (>=2.27.29)", "types-setuptools (>=57.4.17)"] + +[[package]] +name = "markdown" +version = "3.4.3" +description = "Python implementation of John Gruber's Markdown." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "Markdown-3.4.3-py3-none-any.whl", hash = "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2"}, + {file = "Markdown-3.4.3.tar.gz", hash = "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"}, +] + +[package.extras] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markdown-it-py" +version = "2.2.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"}, + {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "markupsafe" +version = "2.1.2" +description = "Safely add untrusted strings to HTML/XML markup." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-win32.whl", hash = "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603"}, + {file = "MarkupSafe-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-win32.whl", hash = "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625"}, + {file = "MarkupSafe-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859"}, + {file = "MarkupSafe-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-win32.whl", hash = "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2"}, + {file = "MarkupSafe-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-win32.whl", hash = "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7"}, + {file = "MarkupSafe-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed"}, + {file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"}, +] + +[[package]] +name = "marshmallow" +version = "3.19.0" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "marshmallow-3.19.0-py3-none-any.whl", hash = "sha256:93f0958568da045b0021ec6aeb7ac37c81bfcccbb9a0e7ed8559885070b3a19b"}, + {file = "marshmallow-3.19.0.tar.gz", hash = "sha256:90032c0fd650ce94b6ec6dc8dfeb0e3ff50c144586462c389b81a07205bedb78"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==5.0.4)", "flake8-bugbear (==22.10.25)", "mypy (==0.990)", "pre-commit (>=2.4,<3.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.12)", "autodocsumm (==0.2.9)", "sphinx (==5.3.0)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==5.0.4)", "flake8-bugbear (==22.10.25)", "mypy (==0.990)", "pre-commit (>=2.4,<3.0)"] +tests = ["pytest", "pytz", "simplejson"] + +[[package]] +name = "marshmallow-enum" +version = "1.5.1" +description = "Enum field for Marshmallow" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"}, + {file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"}, +] + +[package.dependencies] +marshmallow = ">=2.0.0" + +[[package]] +name = "matplotlib-inline" +version = "0.1.6" +description = "Inline Matplotlib backend for Jupyter" +category = "dev" +optional = false +python-versions = ">=3.5" +files = [ + {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, + {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"}, +] + +[package.dependencies] +traitlets = "*" + +[[package]] +name = "mdit-py-plugins" +version = "0.3.5" +description = "Collection of plugins for markdown-it-py" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdit-py-plugins-0.3.5.tar.gz", hash = "sha256:eee0adc7195e5827e17e02d2a258a2ba159944a0748f59c5099a4a27f78fcf6a"}, + {file = "mdit_py_plugins-0.3.5-py3-none-any.whl", hash = "sha256:ca9a0714ea59a24b2b044a1831f48d817dd0c817e84339f20e7889f392d77c4e"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<3.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["attrs", "myst-parser (>=0.16.1,<0.17.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "mistune" +version = "2.0.5" +description = "A sane Markdown parser with useful plugins and renderers" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "mistune-2.0.5-py2.py3-none-any.whl", hash = "sha256:bad7f5d431886fcbaf5f758118ecff70d31f75231b34024a1341120340a65ce8"}, + {file = "mistune-2.0.5.tar.gz", hash = "sha256:0246113cb2492db875c6be56974a7c893333bf26cd92891c85f63151cee09d34"}, +] + +[[package]] +name = "more-itertools" +version = "9.1.0" +description = "More routines for operating on iterables, beyond itertools" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "more-itertools-9.1.0.tar.gz", hash = "sha256:cabaa341ad0389ea83c17a94566a53ae4c9d07349861ecb14dc6d0345cf9ac5d"}, + {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"}, +] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "multiprocess" +version = "0.70.14" +description = "better multiprocessing and multithreading in python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:560a27540daef4ce8b24ed3cc2496a3c670df66c96d02461a4da67473685adf3"}, + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:bfbbfa36f400b81d1978c940616bc77776424e5e34cb0c94974b178d727cfcd5"}, + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:89fed99553a04ec4f9067031f83a886d7fdec5952005551a896a4b6a59575bb9"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:40a5e3685462079e5fdee7c6789e3ef270595e1755199f0d50685e72523e1d2a"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:44936b2978d3f2648727b3eaeab6d7fa0bedf072dc5207bf35a96d5ee7c004cf"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e628503187b5d494bf29ffc52d3e1e57bb770ce7ce05d67c4bbdb3a0c7d3b05f"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0d5da0fc84aacb0e4bd69c41b31edbf71b39fe2fb32a54eaedcaea241050855c"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:6a7b03a5b98e911a7785b9116805bd782815c5e2bd6c91c6a320f26fd3e7b7ad"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cea5bdedd10aace3c660fedeac8b087136b4366d4ee49a30f1ebf7409bce00ae"}, + {file = "multiprocess-0.70.14-py310-none-any.whl", hash = "sha256:7dc1f2f6a1d34894c8a9a013fbc807971e336e7cc3f3ff233e61b9dc679b3b5c"}, + {file = "multiprocess-0.70.14-py37-none-any.whl", hash = "sha256:93a8208ca0926d05cdbb5b9250a604c401bed677579e96c14da3090beb798193"}, + {file = "multiprocess-0.70.14-py38-none-any.whl", hash = "sha256:6725bc79666bbd29a73ca148a0fb5f4ea22eed4a8f22fce58296492a02d18a7b"}, + {file = "multiprocess-0.70.14-py39-none-any.whl", hash = "sha256:63cee628b74a2c0631ef15da5534c8aedbc10c38910b9c8b18dcd327528d1ec7"}, + {file = "multiprocess-0.70.14.tar.gz", hash = "sha256:3eddafc12f2260d27ae03fe6069b12570ab4764ab59a75e81624fac453fbf46a"}, +] + +[package.dependencies] +dill = ">=0.3.6" + +[[package]] +name = "murmurhash" +version = "1.0.9" +description = "Cython bindings for MurmurHash" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "murmurhash-1.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:697ed01454d92681c7ae26eb1adcdc654b54062bcc59db38ed03cad71b23d449"}, + {file = "murmurhash-1.0.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ef31b5c11be2c064dbbdd0e22ab3effa9ceb5b11ae735295c717c120087dd94"}, + {file = "murmurhash-1.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7a2bd203377a31bbb2d83fe3f968756d6c9bbfa36c64c6ebfc3c6494fc680bc"}, + {file = "murmurhash-1.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eb0f8e652431ea238c11bcb671fef5c03aff0544bf7e098df81ea4b6d495405"}, + {file = "murmurhash-1.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:cf0b3fe54dca598f5b18c9951e70812e070ecb4c0672ad2cc32efde8a33b3df6"}, + {file = "murmurhash-1.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5dc41be79ba4d09aab7e9110a8a4d4b37b184b63767b1b247411667cdb1057a3"}, + {file = "murmurhash-1.0.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c0f84ecdf37c06eda0222f2f9e81c0974e1a7659c35b755ab2fdc642ebd366db"}, + {file = "murmurhash-1.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:241693c1c819148eac29d7882739b1099c891f1f7431127b2652c23f81722cec"}, + {file = "murmurhash-1.0.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f5ca56c430230d3b581dfdbc54eb3ad8b0406dcc9afdd978da2e662c71d370"}, + {file = "murmurhash-1.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:660ae41fc6609abc05130543011a45b33ca5d8318ae5c70e66bbd351ca936063"}, + {file = "murmurhash-1.0.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01137d688a6b259bde642513506b062364ea4e1609f886d9bd095c3ae6da0b94"}, + {file = "murmurhash-1.0.9-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b70bbf55d89713873a35bd4002bc231d38e530e1051d57ca5d15f96c01fd778"}, + {file = "murmurhash-1.0.9-cp36-cp36m-win_amd64.whl", hash = "sha256:3e802fa5b0e618ee99e8c114ce99fc91677f14e9de6e18b945d91323a93c84e8"}, + {file = "murmurhash-1.0.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:213d0248e586082e1cab6157d9945b846fd2b6be34357ad5ea0d03a1931d82ba"}, + {file = "murmurhash-1.0.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94b89d02aeab5e6bad5056f9d08df03ac7cfe06e61ff4b6340feb227fda80ce8"}, + {file = "murmurhash-1.0.9-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c2e2ee2d91a87952fe0f80212e86119aa1fd7681f03e6c99b279e50790dc2b3"}, + {file = "murmurhash-1.0.9-cp37-cp37m-win_amd64.whl", hash = "sha256:8c3d69fb649c77c74a55624ebf7a0df3c81629e6ea6e80048134f015da57b2ea"}, + {file = "murmurhash-1.0.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ab78675510f83e7a3c6bd0abdc448a9a2b0b385b0d7ee766cbbfc5cc278a3042"}, + {file = "murmurhash-1.0.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0ac5530c250d2b0073ed058555847c8d88d2d00229e483d45658c13b32398523"}, + {file = "murmurhash-1.0.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69157e8fa6b25c4383645227069f6a1f8738d32ed2a83558961019ca3ebef56a"}, + {file = "murmurhash-1.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2aebe2ae016525a662ff772b72a2c9244a673e3215fcd49897f494258b96f3e7"}, + {file = "murmurhash-1.0.9-cp38-cp38-win_amd64.whl", hash = "sha256:a5952f9c18a717fa17579e27f57bfa619299546011a8378a8f73e14eece332f6"}, + {file = "murmurhash-1.0.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef79202feeac68e83971239169a05fa6514ecc2815ce04c8302076d267870f6e"}, + {file = "murmurhash-1.0.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799fcbca5693ad6a40f565ae6b8e9718e5875a63deddf343825c0f31c32348fa"}, + {file = "murmurhash-1.0.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9b995bc82eaf9223e045210207b8878fdfe099a788dd8abd708d9ee58459a9d"}, + {file = "murmurhash-1.0.9-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b129e1c5ebd772e6ff5ef925bcce695df13169bd885337e6074b923ab6edcfc8"}, + {file = "murmurhash-1.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:379bf6b414bd27dd36772dd1570565a7d69918e980457370838bd514df0d91e9"}, + {file = "murmurhash-1.0.9.tar.gz", hash = "sha256:fe7a38cb0d3d87c14ec9dddc4932ffe2dbc77d75469ab80fd5014689b0e07b58"}, +] + +[[package]] +name = "mypy" +version = "0.991" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, + {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, + {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, + {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, + {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, + {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, + {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, + {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, + {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, + {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, + {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, + {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, + {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, + {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, + {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, + {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, + {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, + {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, + {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, + {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, + {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, + {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, + {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, + {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "myst-nb" +version = "0.17.1" +description = "A Jupyter Notebook Sphinx reader built on top of the MyST markdown parser." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "myst-nb-0.17.1.tar.gz", hash = "sha256:14df725f3e00cb5efef4f863bf0c273490c8c662dfee39ed8a7b374bf2561933"}, + {file = "myst_nb-0.17.1-py3-none-any.whl", hash = "sha256:c268d11aa4936b4bdd18b3b2cd5baa14fdb80c80d2983c02329ade52010f6260"}, +] + +[package.dependencies] +importlib_metadata = "*" +ipykernel = "*" +ipython = "*" +jupyter-cache = ">=0.5.0,<0.6.0" +myst-parser = ">=0.18.0,<0.19.0" +nbclient = "*" +nbformat = ">=5.0,<6.0" +pyyaml = "*" +sphinx = ">=4,<6" +typing-extensions = "*" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["alabaster", "altair", "bokeh", "coconut (>=1.4.3,<1.5.0)", "ipykernel (>=5.5,<6.0)", "ipywidgets", "jupytext (>=1.11.2,<1.12.0)", "matplotlib", "numpy", "pandas", "plotly", "sphinx-book-theme (>=0.3.0,<0.4.0)", "sphinx-copybutton", "sphinx-design (>=0.1.0,<0.2.0)", "sphinxcontrib-bibtex", "sympy"] +testing = ["beautifulsoup4", "coverage (>=6.4,<7.0)", "ipykernel (>=5.5,<6.0)", "ipython (!=8.1.0,<8.5)", "ipywidgets (>=8)", "jupytext (>=1.11.2,<1.12.0)", "matplotlib (>=3.5.3,<3.6)", "nbdime", "numpy", "pandas", "pytest (>=7.1,<8.0)", "pytest-cov (>=3.0,<4.0)", "pytest-param-files (>=0.3.3,<0.4.0)", "pytest-regressions", "sympy (>=1.10.1)"] + +[[package]] +name = "myst-parser" +version = "0.18.1" +description = "An extended commonmark compliant parser, with bridges to docutils & sphinx." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "myst-parser-0.18.1.tar.gz", hash = "sha256:79317f4bb2c13053dd6e64f9da1ba1da6cd9c40c8a430c447a7b146a594c246d"}, + {file = "myst_parser-0.18.1-py3-none-any.whl", hash = "sha256:61b275b85d9f58aa327f370913ae1bec26ebad372cc99f3ab85c8ec3ee8d9fb8"}, +] + +[package.dependencies] +docutils = ">=0.15,<0.20" +jinja2 = "*" +markdown-it-py = ">=1.0.0,<3.0.0" +mdit-py-plugins = ">=0.3.1,<0.4.0" +pyyaml = "*" +sphinx = ">=4,<6" +typing-extensions = "*" + +[package.extras] +code-style = ["pre-commit (>=2.12,<3.0)"] +linkify = ["linkify-it-py (>=1.0,<2.0)"] +rtd = ["ipython", "sphinx-book-theme", "sphinx-design", "sphinxcontrib.mermaid (>=0.7.1,<0.8.0)", "sphinxext-opengraph (>=0.6.3,<0.7.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=6,<7)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx (<5.2)", "sphinx-pytest"] + +[[package]] +name = "nbclassic" +version = "0.5.3" +description = "Jupyter Notebook as a Jupyter Server extension." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nbclassic-0.5.3-py3-none-any.whl", hash = "sha256:e849277872d9ffd8fe4b39a8038d01ba82d6a1def9ce11b1b3c26c9546ed5131"}, + {file = "nbclassic-0.5.3.tar.gz", hash = "sha256:889772a7ba524eb781d2901f396540bcad41151e1f7e043f12ebc14a6540d342"}, +] + +[package.dependencies] +argon2-cffi = "*" +ipykernel = "*" +ipython-genutils = "*" +jinja2 = "*" +jupyter-client = ">=6.1.1" +jupyter-core = ">=4.6.1" +jupyter-server = ">=1.8" +nbconvert = ">=5" +nbformat = "*" +nest-asyncio = ">=1.5" +notebook-shim = ">=0.1.0" +prometheus-client = "*" +pyzmq = ">=17" +Send2Trash = ">=1.8.0" +terminado = ">=0.8.3" +tornado = ">=6.1" +traitlets = ">=4.2.1" + +[package.extras] +docs = ["myst-parser", "nbsphinx", "sphinx", "sphinx-rtd-theme", "sphinxcontrib-github-alt"] +json-logging = ["json-logging"] +test = ["coverage", "nbval", "pytest", "pytest-cov", "pytest-jupyter", "pytest-playwright", "pytest-tornasync", "requests", "requests-unixsocket", "testpath"] + +[[package]] +name = "nbclient" +version = "0.5.13" +description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." +category = "dev" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "nbclient-0.5.13-py3-none-any.whl", hash = "sha256:47ac905af59379913c1f8f541098d2550153cf8dc58553cbe18c702b181518b0"}, + {file = "nbclient-0.5.13.tar.gz", hash = "sha256:40c52c9b5e3c31faecaee69f202b3f53e38d7c1c563de0fadde9d7eda0fdafe8"}, +] + +[package.dependencies] +jupyter-client = ">=6.1.5" +nbformat = ">=5.0" +nest-asyncio = "*" +traitlets = ">=5.0.0" + +[package.extras] +sphinx = ["Sphinx (>=1.7)", "mock", "moto", "myst-parser", "sphinx-book-theme"] +test = ["black", "check-manifest", "flake8", "ipykernel", "ipython (<8.0.0)", "ipywidgets (<8.0.0)", "mypy", "pip (>=18.1)", "pytest (>=4.1)", "pytest-asyncio", "pytest-cov (>=2.6.1)", "setuptools (>=38.6.0)", "twine (>=1.11.0)", "wheel (>=0.31.0)", "xmltodict"] + +[[package]] +name = "nbconvert" +version = "7.2.10" +description = "Converting Jupyter Notebooks" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nbconvert-7.2.10-py3-none-any.whl", hash = "sha256:e41118f81698d3d59b3c7c2887937446048f741aba6c367c1c1a77810b3e2d08"}, + {file = "nbconvert-7.2.10.tar.gz", hash = "sha256:8eed67bd8314f3ec87c4351c2f674af3a04e5890ab905d6bd927c05aec1cf27d"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +bleach = "*" +defusedxml = "*" +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} +jinja2 = ">=3.0" +jupyter-core = ">=4.7" +jupyterlab-pygments = "*" +markupsafe = ">=2.0" +mistune = ">=2.0.3,<3" +nbclient = ">=0.5.0" +nbformat = ">=5.1" +packaging = "*" +pandocfilters = ">=1.4.1" +pygments = ">=2.4.1" +tinycss2 = "*" +traitlets = ">=5.0" + +[package.extras] +all = ["nbconvert[docs,qtpdf,serve,test,webpdf]"] +docs = ["ipykernel", "ipython", "myst-parser", "nbsphinx (>=0.2.12)", "pydata-sphinx-theme", "sphinx (==5.0.2)", "sphinxcontrib-spelling"] +qtpdf = ["nbconvert[qtpng]"] +qtpng = ["pyqtwebengine (>=5.15)"] +serve = ["tornado (>=6.1)"] +test = ["ipykernel", "ipywidgets (>=7)", "pre-commit", "pytest", "pytest-dependency"] +webpdf = ["pyppeteer (>=1,<1.1)"] + +[[package]] +name = "nbformat" +version = "5.8.0" +description = "The Jupyter Notebook format" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nbformat-5.8.0-py3-none-any.whl", hash = "sha256:d910082bd3e0bffcf07eabf3683ed7dda0727a326c446eeb2922abe102e65162"}, + {file = "nbformat-5.8.0.tar.gz", hash = "sha256:46dac64c781f1c34dfd8acba16547024110348f9fc7eab0f31981c2a3dc48d1f"}, +] + +[package.dependencies] +fastjsonschema = "*" +jsonschema = ">=2.6" +jupyter-core = "*" +traitlets = ">=5.1" + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] +test = ["pep440", "pre-commit", "pytest", "testpath"] + +[[package]] +name = "nbsphinx" +version = "0.8.12" +description = "Jupyter Notebook Tools for Sphinx" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "nbsphinx-0.8.12-py3-none-any.whl", hash = "sha256:c15b681c7fce287000856f91fe1edac50d29f7b0c15bbc746fbe55c8eb84750b"}, + {file = "nbsphinx-0.8.12.tar.gz", hash = "sha256:76570416cdecbeb21dbf5c3d6aa204ced6c1dd7ebef4077b5c21b8c6ece9533f"}, +] + +[package.dependencies] +docutils = "*" +jinja2 = "*" +nbconvert = "!=5.4" +nbformat = "*" +sphinx = ">=1.8" +traitlets = ">=5" + +[[package]] +name = "nest-asyncio" +version = "1.5.6" +description = "Patch asyncio to allow nested event loops" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.5.6-py3-none-any.whl", hash = "sha256:b9a953fb40dceaa587d109609098db21900182b16440652454a146cffb06e8b8"}, + {file = "nest_asyncio-1.5.6.tar.gz", hash = "sha256:d267cc1ff794403f7df692964d1d2a3fa9418ffea2a3f6859a439ff482fef290"}, +] + +[[package]] +name = "networkx" +version = "2.8.8" +description = "Python package for creating and manipulating graphs and networks" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"}, + {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=0.982)", "pre-commit (>=2.20)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.2)", "pydata-sphinx-theme (>=0.11)", "sphinx (>=5.2)", "sphinx-gallery (>=0.11)", "texext (>=0.6.6)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nlpcloud" +version = "1.0.40" +description = "Python client for the NLP Cloud API" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "nlpcloud-1.0.40-py3-none-any.whl", hash = "sha256:9efc67dadbf64015330035d8772aff144da3c24701ddef6173b1da3a1b31d407"}, + {file = "nlpcloud-1.0.40.tar.gz", hash = "sha256:f11166782a706431a50e44343f6eb1aa8bac612be08f73e04ad2313d970e86b8"}, +] + +[package.dependencies] +requests = "*" + +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + +[[package]] +name = "nomic" +version = "1.1.0" +description = "The offical Nomic python client." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "nomic-1.1.0.tar.gz", hash = "sha256:dfa14a342e4e0a02651ea277c5f4d4dc22bf6aeb1bcbf0d4428a460f01d0084c"}, +] + +[package.dependencies] +click = "*" +cohere = "*" +jsonlines = "*" +loguru = "*" +numpy = "*" +pyarrow = "*" +pydantic = "*" +requests = "*" +rich = "*" +tqdm = "*" +wonderwords = "*" + +[package.extras] +dev = ["black", "cairosvg", "coverage", "mkautodoc", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "myst-parser", "pillow", "pylint", "pytest", "twine"] + +[[package]] +name = "notebook" +version = "6.5.3" +description = "A web-based notebook environment for interactive computing" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "notebook-6.5.3-py3-none-any.whl", hash = "sha256:50a334ad9d60b30cb759405168ef6fc3d60350ab5439fb1631544bb09dcb2cce"}, + {file = "notebook-6.5.3.tar.gz", hash = "sha256:b12bee3292211d85dd7e588a790ddce30cb3e8fbcfa1e803522a207f60819e05"}, +] + +[package.dependencies] +argon2-cffi = "*" +ipykernel = "*" +ipython-genutils = "*" +jinja2 = "*" +jupyter-client = ">=5.3.4" +jupyter-core = ">=4.6.1" +nbclassic = ">=0.4.7" +nbconvert = ">=5" +nbformat = "*" +nest-asyncio = ">=1.5" +prometheus-client = "*" +pyzmq = ">=17" +Send2Trash = ">=1.8.0" +terminado = ">=0.8.3" +tornado = ">=6.1" +traitlets = ">=4.2.1" + +[package.extras] +docs = ["myst-parser", "nbsphinx", "sphinx", "sphinx-rtd-theme", "sphinxcontrib-github-alt"] +json-logging = ["json-logging"] +test = ["coverage", "nbval", "pytest", "pytest-cov", "requests", "requests-unixsocket", "selenium (==4.1.5)", "testpath"] + +[[package]] +name = "notebook-shim" +version = "0.2.2" +description = "A shim layer for notebook traits and config" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "notebook_shim-0.2.2-py3-none-any.whl", hash = "sha256:9c6c30f74c4fbea6fce55c1be58e7fd0409b1c681b075dcedceb005db5026949"}, + {file = "notebook_shim-0.2.2.tar.gz", hash = "sha256:090e0baf9a5582ff59b607af523ca2db68ff216da0c69956b62cab2ef4fc9c3f"}, +] + +[package.dependencies] +jupyter-server = ">=1.8,<3" + +[package.extras] +test = ["pytest", "pytest-console-scripts", "pytest-tornasync"] + +[[package]] +name = "numcodecs" +version = "0.11.0" +description = "A Python package providing buffer compression and transformation codecs for use" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "numcodecs-0.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bc116752be45b4f9dca4315e5a2b4185e3b46f68c997dbb84aef334ceb5a1d"}, + {file = "numcodecs-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c27dfca402f69fbfa01c46fb572086e77f38121192160cc8ed1177dc30702c52"}, + {file = "numcodecs-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:0fabc7dfdf64a9555bf8a34911e05b415793c67a1377207dc79cd96342291fa1"}, + {file = "numcodecs-0.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dae3f5678f247336c84e7315a0c59a4fec7c33eb7db72d78ff5c776479a812e"}, + {file = "numcodecs-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697785b786bb0039d3feeaabdc10f25eda6c149700cde954653aaa47637832"}, + {file = "numcodecs-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c2f36b21162c6ebccc05d3fe896f86b91dcf8709946809f730cc23a37f8234d"}, + {file = "numcodecs-0.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c240858bf29e0ff254b1db60430e8b2658b8c8328b684f80033289d94807a7c"}, + {file = "numcodecs-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee5bda16e9d26a7a39fc20b6c1cec23b4debc314df5cfae3ed505149c2eeafc4"}, + {file = "numcodecs-0.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:bd05cdb853c7bcfde2efc809a9df2c5e205b96f70405b810e5788b45d0d81f73"}, + {file = "numcodecs-0.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:694dc2e80b1f169b7deb14bdd0a04b20e5f17ef32cb0f81b71ab690406ec6bd9"}, + {file = "numcodecs-0.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf3925eeb37aed0e6c04d7fb9614133a3c8426dc77f8bda54c99c601a44b3bd3"}, + {file = "numcodecs-0.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:11596b71267417425ea8afb407477a67d684f434c8b07b1dd59c25a97d5c3ccb"}, + {file = "numcodecs-0.11.0.tar.gz", hash = "sha256:6c058b321de84a1729299b0eae4d652b2e48ea1ca7f9df0da65cb13470e635eb"}, +] + +[package.dependencies] +entrypoints = "*" +numpy = ">=1.7" + +[package.extras] +docs = ["mock", "numpydoc", "sphinx", "sphinx-issues"] +msgpack = ["msgpack"] +test = ["coverage", "flake8", "pytest", "pytest-cov"] +zfpy = ["zfpy (>=1.0.0)"] + +[[package]] +name = "numpy" +version = "1.24.2" +description = "Fundamental package for array computing in Python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"}, + {file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"}, + {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"}, + {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"}, + {file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"}, + {file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"}, + {file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"}, + {file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"}, + {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"}, + {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"}, + {file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"}, + {file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"}, + {file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"}, + {file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"}, + {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"}, + {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"}, + {file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"}, + {file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"}, + {file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"}, + {file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"}, + {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"}, + {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"}, + {file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"}, + {file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"}, + {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"}, +] + +[[package]] +name = "nvidia-cublas-cu11" +version = "11.10.3.66" +description = "CUBLAS native runtime libraries" +category = "main" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"}, + {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-win_amd64.whl", hash = "sha256:8ac17ba6ade3ed56ab898a036f9ae0756f1e81052a317bf98f8c6d18dc3ae49e"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cuda-nvrtc-cu11" +version = "11.7.99" +description = "NVRTC native runtime libraries" +category = "main" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:9f1562822ea264b7e34ed5930567e89242d266448e936b85bc97a3370feabb03"}, + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:f7d9610d9b7c331fa0da2d1b2858a4a8315e6d49765091d28711c8946e7425e7"}, + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:f2effeb1309bdd1b3854fc9b17eaf997808f8b25968ce0c7070945c4265d64a3"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cuda-runtime-cu11" +version = "11.7.99" +description = "CUDA Runtime native Libraries" +category = "main" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:cc768314ae58d2641f07eac350f40f99dcb35719c4faff4bc458a7cd2b119e31"}, + {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:bc77fa59a7679310df9d5c70ab13c4e34c64ae2124dd1efd7e5474b71be125c7"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cudnn-cu11" +version = "8.5.0.96" +description = "cuDNN runtime libraries" +category = "main" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"}, + {file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "openai" +version = "0.27.2" +description = "Python client library for the OpenAI API" +category = "main" +optional = true +python-versions = ">=3.7.1" +files = [ + {file = "openai-0.27.2-py3-none-any.whl", hash = "sha256:6df674cf257e9e0504f1fd191c333d3f6a2442b13218d0eccf06230eb24d320e"}, + {file = "openai-0.27.2.tar.gz", hash = "sha256:5869fdfa34b0ec66c39afa22f4a0fb83a135dff81f6505f52834c6ab3113f762"}, +] + +[package.dependencies] +aiohttp = "*" +requests = ">=2.20" +tqdm = "*" + +[package.extras] +datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] +embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] +wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] + +[[package]] +name = "opensearch-py" +version = "2.2.0" +description = "Python client for OpenSearch" +category = "main" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" +files = [ + {file = "opensearch-py-2.2.0.tar.gz", hash = "sha256:109fe8d2e1e8f419a22358eb901025f51e6ad2f50014c8962e23796b2a23cb67"}, + {file = "opensearch_py-2.2.0-py2.py3-none-any.whl", hash = "sha256:595dcebe42e21cdf945add0b5dbaecccace1a8a5ba65d60314813767b564263c"}, +] + +[package.dependencies] +certifi = ">=2022.12.07" +python-dateutil = "*" +requests = ">=2.4.0,<3.0.0" +six = "*" +urllib3 = ">=1.21.1,<2" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +develop = ["black", "botocore", "coverage (<7.0.0)", "jinja2", "mock", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +docs = ["myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +kerberos = ["requests-kerberos"] + +[[package]] +name = "opt-einsum" +version = "3.3.0" +description = "Optimizing numpys einsum function" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147"}, + {file = "opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549"}, +] + +[package.dependencies] +numpy = ">=1.7" + +[package.extras] +docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"] +tests = ["pytest", "pytest-cov", "pytest-pep8"] + +[[package]] +name = "packaging" +version = "23.0" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, +] + +[[package]] +name = "pandocfilters" +version = "1.5.0" +description = "Utilities for writing pandoc filters in python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pandocfilters-1.5.0-py2.py3-none-any.whl", hash = "sha256:33aae3f25fd1a026079f5d27bdd52496f0e0803b3469282162bafdcbdf6ef14f"}, + {file = "pandocfilters-1.5.0.tar.gz", hash = "sha256:0b679503337d233b4339a817bfc8c50064e2eff681314376a47cb582305a7a38"}, +] + +[[package]] +name = "parso" +version = "0.8.3" +description = "A Python Parser" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, + {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, +] + +[package.extras] +qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] +testing = ["docopt", "pytest (<6.0.0)"] + +[[package]] +name = "pathos" +version = "0.3.0" +description = "parallel graph management and execution in heterogeneous computing" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "pathos-0.3.0-py3-none-any.whl", hash = "sha256:b1f5a79b1c79a594330d451832642ee5bb61dd77dc75ba9e5c72087c77e8994c"}, + {file = "pathos-0.3.0.tar.gz", hash = "sha256:24fa8db51fbd9284da8e191794097c4bb2aa3fce411090e57af6385e61b97e09"}, +] + +[package.dependencies] +dill = ">=0.3.6" +multiprocess = ">=0.70.14" +pox = ">=0.3.2" +ppft = ">=1.7.6.6" + +[[package]] +name = "pathspec" +version = "0.11.1" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"}, + {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"}, +] + +[[package]] +name = "pathy" +version = "0.10.1" +description = "pathlib.Path subclasses for local and cloud bucket storage" +category = "main" +optional = true +python-versions = ">= 3.6" +files = [ + {file = "pathy-0.10.1-py3-none-any.whl", hash = "sha256:a7613ee2d99a0a3300e1d836322e2d947c85449fde59f52906f995dbff67dad4"}, + {file = "pathy-0.10.1.tar.gz", hash = "sha256:4cd6e71b4cd5ff875cfbb949ad9fa5519d8d1dbe69d5fc1d1b23aa3cb049618b"}, +] + +[package.dependencies] +smart-open = ">=5.2.1,<7.0.0" +typer = ">=0.3.0,<1.0.0" + +[package.extras] +all = ["azure-storage-blob", "boto3", "google-cloud-storage (>=1.26.0,<2.0.0)", "mock", "pytest", "pytest-coverage", "typer-cli"] +azure = ["azure-storage-blob"] +gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"] +s3 = ["boto3"] +test = ["mock", "pytest", "pytest-coverage", "typer-cli"] + +[[package]] +name = "pexpect" +version = "4.8.0" +description = "Pexpect allows easy control of interactive console applications." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] + +[package.dependencies] +ptyprocess = ">=0.5" + +[[package]] +name = "pgvector" +version = "0.1.6" +description = "pgvector support for Python" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "pgvector-0.1.6-py2.py3-none-any.whl", hash = "sha256:c53d49dae7c5e0e39bc2f05ce8599a853383f11ce9ffaa7bd0924844e16c7bf4"}, +] + +[package.dependencies] +numpy = "*" + +[[package]] +name = "pickleshare" +version = "0.7.5" +description = "Tiny 'shelve'-like database with concurrency support" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] + +[[package]] +name = "pillow" +version = "9.4.0" +description = "Python Imaging Library (Fork)" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "Pillow-9.4.0-1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1"}, + {file = "Pillow-9.4.0-1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12"}, + {file = "Pillow-9.4.0-1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd"}, + {file = "Pillow-9.4.0-1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9"}, + {file = "Pillow-9.4.0-1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858"}, + {file = "Pillow-9.4.0-1-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab"}, + {file = "Pillow-9.4.0-1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9"}, + {file = "Pillow-9.4.0-2-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0"}, + {file = "Pillow-9.4.0-2-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f"}, + {file = "Pillow-9.4.0-2-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c"}, + {file = "Pillow-9.4.0-2-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848"}, + {file = "Pillow-9.4.0-2-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1"}, + {file = "Pillow-9.4.0-2-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33"}, + {file = "Pillow-9.4.0-2-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35"}, + {file = "Pillow-9.4.0-cp310-cp310-win32.whl", hash = "sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a"}, + {file = "Pillow-9.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a"}, + {file = "Pillow-9.4.0-cp311-cp311-win32.whl", hash = "sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c"}, + {file = "Pillow-9.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee"}, + {file = "Pillow-9.4.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5"}, + {file = "Pillow-9.4.0-cp37-cp37m-win32.whl", hash = "sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e"}, + {file = "Pillow-9.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628"}, + {file = "Pillow-9.4.0-cp38-cp38-win32.whl", hash = "sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d"}, + {file = "Pillow-9.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153"}, + {file = "Pillow-9.4.0-cp39-cp39-win32.whl", hash = "sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c"}, + {file = "Pillow-9.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9"}, + {file = "Pillow-9.4.0.tar.gz", hash = "sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "pinecone-client" +version = "2.2.1" +description = "Pinecone client and SDK" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "pinecone-client-2.2.1.tar.gz", hash = "sha256:0878dcaee447c46c8d1b3d71c854689daa7e548e5009a171780907c7d4e74789"}, + {file = "pinecone_client-2.2.1-py3-none-any.whl", hash = "sha256:6976a22aee57a9813378607506c8c36b0317dfa36a08a5397aaaeab2eef66c1b"}, +] + +[package.dependencies] +dnspython = ">=2.0.0" +loguru = ">=0.5.0" +numpy = "*" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4" +requests = ">=2.19.0" +tqdm = ">=4.64.1" +typing-extensions = ">=3.7.4" +urllib3 = ">=1.21.1" + +[package.extras] +grpc = ["googleapis-common-protos (>=1.53.0)", "grpc-gateway-protoc-gen-openapiv2 (==0.1.0)", "grpcio (>=1.44.0)", "lz4 (>=3.1.3)", "protobuf (==3.19.3)"] + +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] + +[[package]] +name = "platformdirs" +version = "3.2.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.2.0-py3-none-any.whl", hash = "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"}, + {file = "platformdirs-3.2.0.tar.gz", hash = "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08"}, +] + +[package.extras] +docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] + +[[package]] +name = "playwright" +version = "1.31.1" +description = "A high-level API to automate web browsers" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "playwright-1.31.1-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:225ff39ff29219caab242a562e6a5efa486d4659ac10d16421ad5904c7f23b25"}, + {file = "playwright-1.31.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c51029b8837f3533277718ff003a6fc1b512e7879ef4880306c69048345d04ac"}, + {file = "playwright-1.31.1-py3-none-macosx_11_0_universal2.whl", hash = "sha256:d74ee6b7de96ddaf3af91a90d6160beda68b281b1027b8afec46945062c25aec"}, + {file = "playwright-1.31.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:12c73e6a10ba8a1ddac4849a6f06c753e38de60bec6db5f50f7580ade5772d97"}, + {file = "playwright-1.31.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f032c87b918a0b348b15065d5a67693cd70f39861ea65b8c9f0b63c991bf1a75"}, + {file = "playwright-1.31.1-py3-none-win32.whl", hash = "sha256:5a3eefb7bbe5fb90c89b31ac52a6d78dde94cf82d5fbe0a999789a37d414a92f"}, + {file = "playwright-1.31.1-py3-none-win_amd64.whl", hash = "sha256:5cb681c1bfc05e49dfe5299b9bf2f51300e60ed077d47c927c8f056938972565"}, +] + +[package.dependencies] +greenlet = "2.0.1" +pyee = "9.0.4" +typing-extensions = {version = "*", markers = "python_version <= \"3.8\""} + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pox" +version = "0.3.2" +description = "utilities for filesystem exploration and automated builds" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "pox-0.3.2-py3-none-any.whl", hash = "sha256:56fe2f099ecd8a557b8948082504492de90e8598c34733c9b1fdeca8f7b6de61"}, + {file = "pox-0.3.2.tar.gz", hash = "sha256:e825225297638d6e3d49415f8cfb65407a5d15e56f2fb7fe9d9b9e3050c65ee1"}, +] + +[[package]] +name = "ppft" +version = "1.7.6.6" +description = "distributed and parallel python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "ppft-1.7.6.6-py3-none-any.whl", hash = "sha256:f355d2caeed8bd7c9e4a860c471f31f7e66d1ada2791ab5458ea7dca15a51e41"}, + {file = "ppft-1.7.6.6.tar.gz", hash = "sha256:f933f0404f3e808bc860745acb3b79cd4fe31ea19a20889a645f900415be60f1"}, +] + +[package.extras] +dill = ["dill (>=0.3.6)"] + +[[package]] +name = "preshed" +version = "3.0.8" +description = "Cython hash table that trusts the keys are pre-hashed" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "preshed-3.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ea4b6df8ef7af38e864235256793bc3056e9699d991afcf6256fa298858582fc"}, + {file = "preshed-3.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e945fc814bdc29564a2ce137c237b3a9848aa1e76a1160369b6e0d328151fdd"}, + {file = "preshed-3.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9a4833530fe53001c351974e0c8bb660211b8d0358e592af185fec1ae12b2d0"}, + {file = "preshed-3.0.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1472ee231f323b4f4368b1b5f8f08481ed43af89697d45450c6ae4af46ac08a"}, + {file = "preshed-3.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:c8a2e2931eea7e500fbf8e014b69022f3fab2e35a70da882e2fc753e5e487ae3"}, + {file = "preshed-3.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e1bb8701df7861af26a312225bdf7c4822ac06fcf75aeb60fe2b0a20e64c222"}, + {file = "preshed-3.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e9aef2b0b7687aecef48b1c6ff657d407ff24e75462877dcb888fa904c4a9c6d"}, + {file = "preshed-3.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:854d58a8913ebf3b193b0dc8064155b034e8987de25f26838dfeca09151fda8a"}, + {file = "preshed-3.0.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:135e2ac0db1a3948d6ec295598c7e182b52c394663f2fcfe36a97ae51186be21"}, + {file = "preshed-3.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:019d8fa4161035811fb2804d03214143298739e162d0ad24e087bd46c50970f5"}, + {file = "preshed-3.0.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a49ce52856fbb3ef4f1cc744c53f5d7e1ca370b1939620ac2509a6d25e02a50"}, + {file = "preshed-3.0.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdbc2957b36115a576c515ffe963919f19d2683f3c76c9304ae88ef59f6b5ca6"}, + {file = "preshed-3.0.8-cp36-cp36m-win_amd64.whl", hash = "sha256:09cc9da2ac1b23010ce7d88a5e20f1033595e6dd80be14318e43b9409f4c7697"}, + {file = "preshed-3.0.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e19c8069f1a1450f835f23d47724530cf716d581fcafb398f534d044f806b8c2"}, + {file = "preshed-3.0.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25b5ef5e387a0e17ff41202a8c1816184ab6fb3c0d0b847bf8add0ed5941eb8d"}, + {file = "preshed-3.0.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53d3e2456a085425c66af7baba62d7eaa24aa5e460e1a9e02c401a2ed59abd7b"}, + {file = "preshed-3.0.8-cp37-cp37m-win_amd64.whl", hash = "sha256:85e98a618fb36cdcc37501d8b9b8c1246651cc2f2db3a70702832523e0ae12f4"}, + {file = "preshed-3.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f8837bf616335464f3713cbf562a3dcaad22c3ca9193f957018964ef871a68b"}, + {file = "preshed-3.0.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:720593baf2c2e295f855192974799e486da5f50d4548db93c44f5726a43cefb9"}, + {file = "preshed-3.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0ad3d860b9ce88a74cf7414bb4b1c6fd833813e7b818e76f49272c4974b19ce"}, + {file = "preshed-3.0.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd19d48440b152657966a52e627780c0ddbe9d907b8d7ee4598505e80a3c55c7"}, + {file = "preshed-3.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:246e7c6890dc7fe9b10f0e31de3346b906e3862b6ef42fcbede37968f46a73bf"}, + {file = "preshed-3.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67643e66691770dc3434b01671648f481e3455209ce953727ef2330b16790aaa"}, + {file = "preshed-3.0.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ae25a010c9f551aa2247ee621457f679e07c57fc99d3fd44f84cb40b925f12c"}, + {file = "preshed-3.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6a7fcf7dd2e7711051b3f0432da9ec9c748954c989f49d2cd8eabf8c2d953e"}, + {file = "preshed-3.0.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5942858170c4f53d9afc6352a86bbc72fc96cc4d8964b6415492114a5920d3ed"}, + {file = "preshed-3.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:06793022a56782ef51d74f1399925a2ba958e50c5cfbc6fa5b25c4945e158a07"}, + {file = "preshed-3.0.8.tar.gz", hash = "sha256:6c74c70078809bfddda17be96483c41d06d717934b07cab7921011d81758b357"}, +] + +[package.dependencies] +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=0.28.0,<1.1.0" + +[[package]] +name = "prometheus-client" +version = "0.16.0" +description = "Python client for the Prometheus monitoring system." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "prometheus_client-0.16.0-py3-none-any.whl", hash = "sha256:0836af6eb2c8f4fed712b2f279f6c0a8bbab29f9f4aa15276b91c7cb0d1616ab"}, + {file = "prometheus_client-0.16.0.tar.gz", hash = "sha256:a03e35b359f14dd1630898543e2120addfdeacd1a6069c1367ae90fd93ad3f48"}, +] + +[package.extras] +twisted = ["twisted"] + +[[package]] +name = "prompt-toolkit" +version = "3.0.38" +description = "Library for building powerful interactive command lines in Python" +category = "dev" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "prompt_toolkit-3.0.38-py3-none-any.whl", hash = "sha256:45ea77a2f7c60418850331366c81cf6b5b9cf4c7fd34616f733c5427e6abbb1f"}, + {file = "prompt_toolkit-3.0.38.tar.gz", hash = "sha256:23ac5d50538a9a38c8bde05fecb47d0b403ecd0662857a86f886f798563d5b9b"}, +] + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "protobuf" +version = "3.19.6" +description = "Protocol Buffers" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "protobuf-3.19.6-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:010be24d5a44be7b0613750ab40bc8b8cedc796db468eae6c779b395f50d1fa1"}, + {file = "protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11478547958c2dfea921920617eb457bc26867b0d1aa065ab05f35080c5d9eb6"}, + {file = "protobuf-3.19.6-cp310-cp310-win32.whl", hash = "sha256:559670e006e3173308c9254d63facb2c03865818f22204037ab76f7a0ff70b5f"}, + {file = "protobuf-3.19.6-cp310-cp310-win_amd64.whl", hash = "sha256:347b393d4dd06fb93a77620781e11c058b3b0a5289262f094379ada2920a3730"}, + {file = "protobuf-3.19.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a8ce5ae0de28b51dff886fb922012dad885e66176663950cb2344c0439ecb473"}, + {file = "protobuf-3.19.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b0d02163c4e67279ddb6dc25e063db0130fc299aefabb5d481053509fae5c8"}, + {file = "protobuf-3.19.6-cp36-cp36m-win32.whl", hash = "sha256:30f5370d50295b246eaa0296533403961f7e64b03ea12265d6dfce3a391d8992"}, + {file = "protobuf-3.19.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0c0714b025ec057b5a7600cb66ce7c693815f897cfda6d6efb58201c472e3437"}, + {file = "protobuf-3.19.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5057c64052a1f1dd7d4450e9aac25af6bf36cfbfb3a1cd89d16393a036c49157"}, + {file = "protobuf-3.19.6-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bb6776bd18f01ffe9920e78e03a8676530a5d6c5911934c6a1ac6eb78973ecb6"}, + {file = "protobuf-3.19.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84a04134866861b11556a82dd91ea6daf1f4925746b992f277b84013a7cc1229"}, + {file = "protobuf-3.19.6-cp37-cp37m-win32.whl", hash = "sha256:4bc98de3cdccfb5cd769620d5785b92c662b6bfad03a202b83799b6ed3fa1fa7"}, + {file = "protobuf-3.19.6-cp37-cp37m-win_amd64.whl", hash = "sha256:aa3b82ca1f24ab5326dcf4ea00fcbda703e986b22f3d27541654f749564d778b"}, + {file = "protobuf-3.19.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2b2d2913bcda0e0ec9a784d194bc490f5dc3d9d71d322d070b11a0ade32ff6ba"}, + {file = "protobuf-3.19.6-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d0b635cefebd7a8a0f92020562dead912f81f401af7e71f16bf9506ff3bdbb38"}, + {file = "protobuf-3.19.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a552af4dc34793803f4e735aabe97ffc45962dfd3a237bdde242bff5a3de684"}, + {file = "protobuf-3.19.6-cp38-cp38-win32.whl", hash = "sha256:0469bc66160180165e4e29de7f445e57a34ab68f49357392c5b2f54c656ab25e"}, + {file = "protobuf-3.19.6-cp38-cp38-win_amd64.whl", hash = "sha256:91d5f1e139ff92c37e0ff07f391101df77e55ebb97f46bbc1535298d72019462"}, + {file = "protobuf-3.19.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c0ccd3f940fe7f3b35a261b1dd1b4fc850c8fde9f74207015431f174be5976b3"}, + {file = "protobuf-3.19.6-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:30a15015d86b9c3b8d6bf78d5b8c7749f2512c29f168ca259c9d7727604d0e39"}, + {file = "protobuf-3.19.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:878b4cd080a21ddda6ac6d1e163403ec6eea2e206cf225982ae04567d39be7b0"}, + {file = "protobuf-3.19.6-cp39-cp39-win32.whl", hash = "sha256:5a0d7539a1b1fb7e76bf5faa0b44b30f812758e989e59c40f77a7dab320e79b9"}, + {file = "protobuf-3.19.6-cp39-cp39-win_amd64.whl", hash = "sha256:bbf5cea5048272e1c60d235c7bd12ce1b14b8a16e76917f371c718bd3005f045"}, + {file = "protobuf-3.19.6-py2.py3-none-any.whl", hash = "sha256:14082457dc02be946f60b15aad35e9f5c69e738f80ebbc0900a19bc83734a5a4"}, + {file = "protobuf-3.19.6.tar.gz", hash = "sha256:5f5540d57a43042389e87661c6eaa50f47c19c6176e8cf1c4f287aeefeccb5c4"}, +] + +[[package]] +name = "psutil" +version = "5.9.4" +description = "Cross-platform lib for process and system monitoring in Python." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"}, + {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"}, + {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"}, + {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"}, + {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"}, + {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"}, + {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"}, + {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"}, +] + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + +[[package]] +name = "psycopg2-binary" +version = "2.9.5" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "psycopg2-binary-2.9.5.tar.gz", hash = "sha256:33e632d0885b95a8b97165899006c40e9ecdc634a529dca7b991eb7de4ece41c"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-macosx_10_15_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:0775d6252ccb22b15da3b5d7adbbf8cfe284916b14b6dc0ff503a23edb01ee85"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ec46ed947801652c9643e0b1dc334cfb2781232e375ba97312c2fc256597632"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3520d7af1ebc838cc6084a3281145d5cd5bdd43fdef139e6db5af01b92596cb7"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cbc554ba47ecca8cd3396ddaca85e1ecfe3e48dd57dc5e415e59551affe568e"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:5d28ecdf191db558d0c07d0f16524ee9d67896edf2b7990eea800abeb23ebd61"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-manylinux_2_24_ppc64le.whl", hash = "sha256:b9c33d4aef08dfecbd1736ceab8b7b3c4358bf10a0121483e5cd60d3d308cc64"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:05b3d479425e047c848b9782cd7aac9c6727ce23181eb9647baf64ffdfc3da41"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:1e491e6489a6cb1d079df8eaa15957c277fdedb102b6a68cfbf40c4994412fd0"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:9e32cedc389bcb76d9f24ea8a012b3cb8385ee362ea437e1d012ffaed106c17d"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:46850a640df62ae940e34a163f72e26aca1f88e2da79148e1862faaac985c302"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-win32.whl", hash = "sha256:3d790f84201c3698d1bfb404c917f36e40531577a6dda02e45ba29b64d539867"}, + {file = "psycopg2_binary-2.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:1764546ffeaed4f9428707be61d68972eb5ede81239b46a45843e0071104d0dd"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-macosx_10_9_universal2.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:426c2ae999135d64e6a18849a7d1ad0e1bd007277e4a8f4752eaa40a96b550ff"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cf1d44e710ca3a9ce952bda2855830fe9f9017ed6259e01fcd71ea6287565f5"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024030b13bdcbd53d8a93891a2cf07719715724fc9fee40243f3bd78b4264b8f"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcda1c84a1c533c528356da5490d464a139b6e84eb77cc0b432e38c5c6dd7882"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:2ef892cabdccefe577088a79580301f09f2a713eb239f4f9f62b2b29cafb0577"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-manylinux_2_24_ppc64le.whl", hash = "sha256:af0516e1711995cb08dc19bbd05bec7dbdebf4185f68870595156718d237df3e"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e72c91bda9880f097c8aa3601a2c0de6c708763ba8128006151f496ca9065935"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e67b3c26e9b6d37b370c83aa790bbc121775c57bfb096c2e77eacca25fd0233b"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5fc447058d083b8c6ac076fc26b446d44f0145308465d745fba93a28c14c9e32"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d892bfa1d023c3781a3cab8dd5af76b626c483484d782e8bd047c180db590e4c"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-win32.whl", hash = "sha256:2abccab84d057723d2ca8f99ff7b619285d40da6814d50366f61f0fc385c3903"}, + {file = "psycopg2_binary-2.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:bef7e3f9dc6f0c13afdd671008534be5744e0e682fb851584c8c3a025ec09720"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:6e63814ec71db9bdb42905c925639f319c80e7909fb76c3b84edc79dadef8d60"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:212757ffcecb3e1a5338d4e6761bf9c04f750e7d027117e74aa3cd8a75bb6fbd"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f8a9bcab7b6db2e3dbf65b214dfc795b4c6b3bb3af922901b6a67f7cb47d5f8"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-manylinux_2_24_aarch64.whl", hash = "sha256:56b2957a145f816726b109ee3d4e6822c23f919a7d91af5a94593723ed667835"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-manylinux_2_24_ppc64le.whl", hash = "sha256:f95b8aca2703d6a30249f83f4fe6a9abf2e627aa892a5caaab2267d56be7ab69"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:70831e03bd53702c941da1a1ad36c17d825a24fbb26857b40913d58df82ec18b"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:dbc332beaf8492b5731229a881807cd7b91b50dbbbaf7fe2faf46942eda64a24"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:2d964eb24c8b021623df1c93c626671420c6efadbdb8655cb2bd5e0c6fa422ba"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:95076399ec3b27a8f7fa1cc9a83417b1c920d55cf7a97f718a94efbb96c7f503"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-win32.whl", hash = "sha256:3fc33295cfccad697a97a76dec3f1e94ad848b7b163c3228c1636977966b51e2"}, + {file = "psycopg2_binary-2.9.5-cp36-cp36m-win_amd64.whl", hash = "sha256:02551647542f2bf89073d129c73c05a25c372fc0a49aa50e0de65c3c143d8bd0"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-macosx_10_15_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:63e318dbe52709ed10d516a356f22a635e07a2e34c68145484ed96a19b0c4c68"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7e518a0911c50f60313cb9e74a169a65b5d293770db4770ebf004245f24b5c5"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9d38a4656e4e715d637abdf7296e98d6267df0cc0a8e9a016f8ba07e4aa3eeb"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:68d81a2fe184030aa0c5c11e518292e15d342a667184d91e30644c9d533e53e1"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash = "sha256:7ee3095d02d6f38bd7d9a5358fcc9ea78fcdb7176921528dd709cc63f40184f5"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:46512486be6fbceef51d7660dec017394ba3e170299d1dc30928cbedebbf103a"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b911dfb727e247340d36ae20c4b9259e4a64013ab9888ccb3cbba69b77fd9636"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:422e3d43b47ac20141bc84b3d342eead8d8099a62881a501e97d15f6addabfe9"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c5682a45df7d9642eff590abc73157c887a68f016df0a8ad722dcc0f888f56d7"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-win32.whl", hash = "sha256:b8104f709590fff72af801e916817560dbe1698028cd0afe5a52d75ceb1fce5f"}, + {file = "psycopg2_binary-2.9.5-cp37-cp37m-win_amd64.whl", hash = "sha256:7b3751857da3e224f5629400736a7b11e940b5da5f95fa631d86219a1beaafec"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-macosx_10_15_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:043a9fd45a03858ff72364b4b75090679bd875ee44df9c0613dc862ca6b98460"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9ffdc51001136b699f9563b1c74cc1f8c07f66ef7219beb6417a4c8aaa896c28"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c15ba5982c177bc4b23a7940c7e4394197e2d6a424a2d282e7c236b66da6d896"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc85b3777068ed30aff8242be2813038a929f2084f69e43ef869daddae50f6ee"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:215d6bf7e66732a514f47614f828d8c0aaac9a648c46a831955cb103473c7147"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-manylinux_2_24_ppc64le.whl", hash = "sha256:7d07f552d1e412f4b4e64ce386d4c777a41da3b33f7098b6219012ba534fb2c2"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a0adef094c49f242122bb145c3c8af442070dc0e4312db17e49058c1702606d4"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:00475004e5ed3e3bf5e056d66e5dcdf41a0dc62efcd57997acd9135c40a08a50"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7d88db096fa19d94f433420eaaf9f3c45382da2dd014b93e4bf3215639047c16"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:902844f9c4fb19b17dfa84d9e2ca053d4a4ba265723d62ea5c9c26b38e0aa1e6"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-win32.whl", hash = "sha256:4e7904d1920c0c89105c0517dc7e3f5c20fb4e56ba9cdef13048db76947f1d79"}, + {file = "psycopg2_binary-2.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:a36a0e791805aa136e9cbd0ffa040d09adec8610453ee8a753f23481a0057af5"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-macosx_10_15_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:25382c7d174c679ce6927c16b6fbb68b10e56ee44b1acb40671e02d29f2fce7c"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9c38d3869238e9d3409239bc05bc27d6b7c99c2a460ea337d2814b35fb4fea1b"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5c6527c8efa5226a9e787507652dd5ba97b62d29b53c371a85cd13f957fe4d42"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e59137cdb970249ae60be2a49774c6dfb015bd0403f05af1fe61862e9626642d"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:d4c7b3a31502184e856df1f7bbb2c3735a05a8ce0ade34c5277e1577738a5c91"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-manylinux_2_24_ppc64le.whl", hash = "sha256:b9a794cef1d9c1772b94a72eec6da144c18e18041d294a9ab47669bc77a80c1d"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5254cbd4f4855e11cebf678c1a848a3042d455a22a4ce61349c36aafd4c2267"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c5e65c6ac0ae4bf5bef1667029f81010b6017795dcb817ba5c7b8a8d61fab76f"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:74eddec4537ab1f701a1647214734bc52cee2794df748f6ae5908e00771f180a"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:01ad49d68dd8c5362e4bfb4158f2896dc6e0c02e87b8a3770fc003459f1a4425"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-win32.whl", hash = "sha256:937880290775033a743f4836aa253087b85e62784b63fd099ee725d567a48aa1"}, + {file = "psycopg2_binary-2.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:484405b883630f3e74ed32041a87456c5e0e63a8e3429aa93e8714c366d62bd1"}, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +description = "Run a subprocess in a pseudo terminal" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, + {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, +] + +[[package]] +name = "pure-eval" +version = "0.2.2" +description = "Safely evaluate AST nodes without side effects" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, + {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, +] + +[package.extras] +tests = ["pytest"] + +[[package]] +name = "pyarrow" +version = "11.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "pyarrow-11.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:40bb42afa1053c35c749befbe72f6429b7b5f45710e85059cdd534553ebcf4f2"}, + {file = "pyarrow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7c28b5f248e08dea3b3e0c828b91945f431f4202f1a9fe84d1012a761324e1ba"}, + {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a37bc81f6c9435da3c9c1e767324ac3064ffbe110c4e460660c43e144be4ed85"}, + {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7c53def8dbbc810282ad308cc46a523ec81e653e60a91c609c2233ae407689"}, + {file = "pyarrow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:25aa11c443b934078bfd60ed63e4e2d42461682b5ac10f67275ea21e60e6042c"}, + {file = "pyarrow-11.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e217d001e6389b20a6759392a5ec49d670757af80101ee6b5f2c8ff0172e02ca"}, + {file = "pyarrow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad42bb24fc44c48f74f0d8c72a9af16ba9a01a2ccda5739a517aa860fa7e3d56"}, + {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d942c690ff24a08b07cb3df818f542a90e4d359381fbff71b8f2aea5bf58841"}, + {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f010ce497ca1b0f17a8243df3048055c0d18dcadbcc70895d5baf8921f753de5"}, + {file = "pyarrow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2f51dc7ca940fdf17893227edb46b6784d37522ce08d21afc56466898cb213b2"}, + {file = "pyarrow-11.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1cbcfcbb0e74b4d94f0b7dde447b835a01bc1d16510edb8bb7d6224b9bf5bafc"}, + {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaee8f79d2a120bf3e032d6d64ad20b3af6f56241b0ffc38d201aebfee879d00"}, + {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:410624da0708c37e6a27eba321a72f29d277091c8f8d23f72c92bada4092eb5e"}, + {file = "pyarrow-11.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2d53ba72917fdb71e3584ffc23ee4fcc487218f8ff29dd6df3a34c5c48fe8c06"}, + {file = "pyarrow-11.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f12932e5a6feb5c58192209af1d2607d488cb1d404fbc038ac12ada60327fa34"}, + {file = "pyarrow-11.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:41a1451dd895c0b2964b83d91019e46f15b5564c7ecd5dcb812dadd3f05acc97"}, + {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:becc2344be80e5dce4e1b80b7c650d2fc2061b9eb339045035a1baa34d5b8f1c"}, + {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f40be0d7381112a398b93c45a7e69f60261e7b0269cc324e9f739ce272f4f70"}, + {file = "pyarrow-11.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:362a7c881b32dc6b0eccf83411a97acba2774c10edcec715ccaab5ebf3bb0835"}, + {file = "pyarrow-11.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:ccbf29a0dadfcdd97632b4f7cca20a966bb552853ba254e874c66934931b9841"}, + {file = "pyarrow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e99be85973592051e46412accea31828da324531a060bd4585046a74ba45854"}, + {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69309be84dcc36422574d19c7d3a30a7ea43804f12552356d1ab2a82a713c418"}, + {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da93340fbf6f4e2a62815064383605b7ffa3e9eeb320ec839995b1660d69f89b"}, + {file = "pyarrow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:caad867121f182d0d3e1a0d36f197df604655d0b466f1bc9bafa903aa95083e4"}, + {file = "pyarrow-11.0.0.tar.gz", hash = "sha256:5461c57dbdb211a632a48facb9b39bbeb8a7905ec95d768078525283caef5f6d"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, + {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, +] + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + +[[package]] +name = "pycares" +version = "4.3.0" +description = "Python interface for c-ares" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "pycares-4.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:19c9cdd3322d422931982939773e453e491dfc5c0b2e23d7266959315c7a0824"}, + {file = "pycares-4.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e56e9cdf46a092970dc4b75bbabddea9f480be5eeadc3fcae3eb5c6807c4136"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c75a6241c79b935048272cb77df498da64b8defc8c4b29fdf9870e43ba4cbb4"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24d8654fac3742791b8bef59d1fbb3e19ae6a5c48876a6d98659f7c66ee546c4"}, + {file = "pycares-4.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebf50b049a245880f1aa16a6f72c4408e0a65b49ea1d3bf13383a44a2cabd2bf"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:84daf560962763c0359fd79c750ef480f0fda40c08b57765088dbe362e8dc452"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:978d10da7ee74b9979c494afa8b646411119ad0186a29c7f13c72bb4295630c6"}, + {file = "pycares-4.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c5b9d7fe52eb3d243f5ead58d5c0011884226d961df8360a34618c38c7515"}, + {file = "pycares-4.3.0-cp310-cp310-win32.whl", hash = "sha256:da7c7089ae617317d2cbe38baefd3821387b3bfef7b3ee5b797b871cb1257974"}, + {file = "pycares-4.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7106dc683db30e1d851283b7b9df7a5ea4964d6bdd000d918d91d4b1f9bed329"}, + {file = "pycares-4.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4e7a24ecef0b1933f2a3fdbf328d1b529a76cda113f8364fa0742e5b3bd76566"}, + {file = "pycares-4.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7abccc2aa4771c06994e4d9ed596453061e2b8846f887d9c98a64ccdaf4790a"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531fed46c5ed798a914c3207be4ae7b297c4d09e4183d3cf8fd9ee59a55d5080"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c9335175af0c64a1e0ba67bdd349eb62d4eea0ad02c235ccdf0d535fd20f323"}, + {file = "pycares-4.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5f0e95535027d2dcd51e780410632b0d3ed7e9e5ceb25dc0fe937f2c2960079"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3692179ce5fb96908ba342e1e5303608d0c976f0d5d4619fa9d3d6d9d5a9a1b4"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c4cb6cc7fe8e0606d30b60367f59fe26d1472e88555d61e202db70dea5c8edb"}, + {file = "pycares-4.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3215445396c74103e2054e6b349d9e85883ceda2006d0039fc2d58c9b11818a2"}, + {file = "pycares-4.3.0-cp311-cp311-win32.whl", hash = "sha256:6a0c0c3a0adf490bba9dbb37dbd07ec81e4a6584f095036ac34f06a633710ffe"}, + {file = "pycares-4.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:995cb37cc39bd40ca87bb16555a0f7724f3be30d9f9059a4caab2fde45b1b903"}, + {file = "pycares-4.3.0-cp36-cp36m-win32.whl", hash = "sha256:4c9187be72449c975c11daa1d94d7ddcc494f8a4c37a6c18f977cd7024a531d9"}, + {file = "pycares-4.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d7405ba10a2903a58b8b0faedcb54994c9ee002ad01963587fabf93e7e479783"}, + {file = "pycares-4.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:40aaa12081495f879f11f4cfc95edfec1ea14711188563102f9e33fe98728fac"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4972cac24b66c5997f3a3e2cb608e408066d80103d443e36d626a88a287b9ae7"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35886dba7aa5b73affca8729aeb5a1f5e94d3d9a764adb1b7e75bafca44eeca5"}, + {file = "pycares-4.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5cea6e1f3be016f155d60f27f16c1074d58b4d6e123228fdbc3326d076016af8"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3a9fd2665b053afb39226ac6f8137a60910ca7729358456df2fb94866f4297de"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e8e9195f869120e44e0aa0a6098bb5c19947f4753054365891f592e6f9eab3ef"}, + {file = "pycares-4.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:674486ecf2afb25ee219171b07cdaba481a1aaa2dabb155779c7be9ded03eaa9"}, + {file = "pycares-4.3.0-cp37-cp37m-win32.whl", hash = "sha256:1b6cd3161851499b6894d1e23bfd633e7b775472f5af35ae35409c4a47a2d45e"}, + {file = "pycares-4.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:710120c97b9afdba443564350c3f5f72fd9aae74d95b73dc062ca8ac3d7f36d7"}, + {file = "pycares-4.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9103649bd29d84bc6bcfaf09def9c0592bbc766018fad19d76d09989608b915d"}, + {file = "pycares-4.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c072dbaf73cb5434279578dc35322867d8d5df053e14fdcdcc589994ba4804ae"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008531733f9c7a976b59c7760a3672b191159fd69ae76c01ca051f20b5e44164"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2aae02d97d77dcff840ab55f86cb8b99bf644acbca17e1edb7048408b9782088"}, + {file = "pycares-4.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:257953ae6d400a934fd9193aeb20990ac84a78648bdf5978e998bd007a4045cd"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c28d481efae26936ec08cb6beea305f4b145503b152cf2c4dc68cc4ad9644f0e"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:976249b39037dbfb709ccf7e1c40d2785905a0065536385d501b94570cfed96d"}, + {file = "pycares-4.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:98568c30cfab6b327d94ae1acdf85bbba4cffd415980804985d34ca07e6f4791"}, + {file = "pycares-4.3.0-cp38-cp38-win32.whl", hash = "sha256:a2f3c4f49f43162f7e684419d9834c2c8ec165e54cb8dc47aa9dc0c2132701c0"}, + {file = "pycares-4.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:1730ef93e33e4682fbbf0e7fb19df2ed9822779d17de8ea6e20d5b0d71c1d2be"}, + {file = "pycares-4.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a26b3f1684557025da26ce65d076619890c82b95e38cc7284ce51c3539a1ce8"}, + {file = "pycares-4.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86112cce01655b9f63c5e53b74722084e88e784a7a8ad138d373440337c591c9"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01465a191dc78e923884bb45cd63c7e012623e520cf7ed67e542413ee334804"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9fd5d6012f3ee8c8038cbfe16e988bbd17b2f21eea86650874bf63757ee6161"}, + {file = "pycares-4.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa36b8ea91eae20b5c7205f3e6654423f066af24a1df02b274770a96cbcafaa7"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:61019151130557c1788cae52e4f2f388a7520c9d92574f3a0d61c974c6740db0"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:231962bb46274c52632469a1e686fab065dbd106dbef586de4f7fb101e297587"}, + {file = "pycares-4.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6c979512fa51c7ccef5204fe10ed4e5c44c2bce5f335fe98a3e423f1672bd7d4"}, + {file = "pycares-4.3.0-cp39-cp39-win32.whl", hash = "sha256:655cf0df862ce3847a60e1a106dafa2ba2c14e6636bac49e874347acdc7312dc"}, + {file = "pycares-4.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:36f2251ad0f99a5ce13df45c94c3161d9734c9e9fa2b9b4cc163b853ca170dc5"}, + {file = "pycares-4.3.0.tar.gz", hash = "sha256:c542696f6dac978e9d99192384745a65f80a7d9450501151e4a7563e06010d45"}, +] + +[package.dependencies] +cffi = ">=1.5.0" + +[package.extras] +idna = ["idna (>=2.1)"] + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pydantic" +version = "1.10.7" +description = "Data validation and settings management using python type hints" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-1.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e79e999e539872e903767c417c897e729e015872040e56b96e67968c3b918b2d"}, + {file = "pydantic-1.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:01aea3a42c13f2602b7ecbbea484a98169fb568ebd9e247593ea05f01b884b2e"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:516f1ed9bc2406a0467dd777afc636c7091d71f214d5e413d64fef45174cfc7a"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae150a63564929c675d7f2303008d88426a0add46efd76c3fc797cd71cb1b46f"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"}, + {file = "pydantic-1.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:a7cd2251439988b413cb0a985c4ed82b6c6aac382dbaff53ae03c4b23a70e80a"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:68792151e174a4aa9e9fc1b4e653e65a354a2fa0fed169f7b3d09902ad2cb6f1"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe2507b8ef209da71b6fb5f4e597b50c5a34b78d7e857c4f8f3115effaef5fe"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10a86d8c8db68086f1e30a530f7d5f83eb0685e632e411dbbcf2d5c0150e8dcd"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75ae19d2a3dbb146b6f324031c24f8a3f52ff5d6a9f22f0683694b3afcb16fb"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:464855a7ff7f2cc2cf537ecc421291b9132aa9c79aef44e917ad711b4a93163b"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:193924c563fae6ddcb71d3f06fa153866423ac1b793a47936656e806b64e24ca"}, + {file = "pydantic-1.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:b4a849d10f211389502059c33332e91327bc154acc1845f375a99eca3afa802d"}, + {file = "pydantic-1.10.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cc1dde4e50a5fc1336ee0581c1612215bc64ed6d28d2c7c6f25d2fe3e7c3e918"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0cfe895a504c060e5d36b287ee696e2fdad02d89e0d895f83037245218a87fe"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:670bb4683ad1e48b0ecb06f0cfe2178dcf74ff27921cdf1606e527d2617a81ee"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:950ce33857841f9a337ce07ddf46bc84e1c4946d2a3bba18f8280297157a3fd1"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c15582f9055fbc1bfe50266a19771bbbef33dd28c45e78afbe1996fd70966c2a"}, + {file = "pydantic-1.10.7-cp37-cp37m-win_amd64.whl", hash = "sha256:82dffb306dd20bd5268fd6379bc4bfe75242a9c2b79fec58e1041fbbdb1f7914"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c7f51861d73e8b9ddcb9916ae7ac39fb52761d9ea0df41128e81e2ba42886cd"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6434b49c0b03a51021ade5c4daa7d70c98f7a79e95b551201fff682fc1661245"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64d34ab766fa056df49013bb6e79921a0265204c071984e75a09cbceacbbdd5d"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:701daea9ffe9d26f97b52f1d157e0d4121644f0fcf80b443248434958fd03dc3"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf135c46099ff3f919d2150a948ce94b9ce545598ef2c6c7bf55dca98a304b52"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0f85904f73161817b80781cc150f8b906d521fa11e3cdabae19a581c3606209"}, + {file = "pydantic-1.10.7-cp38-cp38-win_amd64.whl", hash = "sha256:9f6f0fd68d73257ad6685419478c5aece46432f4bdd8d32c7345f1986496171e"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c230c0d8a322276d6e7b88c3f7ce885f9ed16e0910354510e0bae84d54991143"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:976cae77ba6a49d80f461fd8bba183ff7ba79f44aa5cfa82f1346b5626542f8e"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d45fc99d64af9aaf7e308054a0067fdcd87ffe974f2442312372dfa66e1001d"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2a5ebb48958754d386195fe9e9c5106f11275867051bf017a8059410e9abf1f"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:abfb7d4a7cd5cc4e1d1887c43503a7c5dd608eadf8bc615413fc498d3e4645cd"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:80b1fab4deb08a8292d15e43a6edccdffa5377a36a4597bb545b93e79c5ff0a5"}, + {file = "pydantic-1.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:d71e69699498b020ea198468e2480a2f1e7433e32a3a99760058c6520e2bea7e"}, + {file = "pydantic-1.10.7-py3-none-any.whl", hash = "sha256:0cd181f1d0b1d00e2b705f1bf1ac7799a2d938cce3376b8007df62b29be3c2c6"}, + {file = "pydantic-1.10.7.tar.gz", hash = "sha256:cfc83c0678b6ba51b0532bea66860617c4cd4251ecf76e9846fa5a9f3454e97e"}, +] + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pydata-sphinx-theme" +version = "0.8.1" +description = "Bootstrap-based Sphinx theme from the PyData community" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydata_sphinx_theme-0.8.1-py3-none-any.whl", hash = "sha256:af2c99cb0b43d95247b1563860942ba75d7f1596360594fce510caaf8c4fcc16"}, + {file = "pydata_sphinx_theme-0.8.1.tar.gz", hash = "sha256:96165702253917ece13dd895e23b96ee6dce422dcc144d560806067852fe1fed"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +docutils = "!=0.17.0" +packaging = "*" +sphinx = ">=3.5.4,<5" + +[package.extras] +coverage = ["codecov", "pydata-sphinx-theme[test]", "pytest-cov"] +dev = ["nox", "pre-commit", "pydata-sphinx-theme[coverage]", "pyyaml"] +doc = ["jupyter_sphinx", "myst-parser", "numpy", "numpydoc", "pandas", "plotly", "pytest", "pytest-regressions", "sphinx-sitemap", "sphinxext-rediraffe", "xarray"] +test = ["pydata-sphinx-theme[doc]", "pytest"] + +[[package]] +name = "pyee" +version = "9.0.4" +description = "A port of node.js's EventEmitter to python." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pyee-9.0.4-py2.py3-none-any.whl", hash = "sha256:9f066570130c554e9cc12de5a9d86f57c7ee47fece163bbdaa3e9c933cfbdfa5"}, + {file = "pyee-9.0.4.tar.gz", hash = "sha256:2770c4928abc721f46b705e6a72b0c59480c4a69c9a83ca0b00bb994f1ea4b32"}, +] + +[package.dependencies] +typing-extensions = "*" + +[[package]] +name = "pygments" +version = "2.14.0" +description = "Pygments is a syntax highlighting package written in Python." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"}, + {file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + +[[package]] +name = "pyjwt" +version = "2.6.0" +description = "JSON Web Token implementation in Python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.6.0-py3-none-any.whl", hash = "sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14"}, + {file = "PyJWT-2.6.0.tar.gz", hash = "sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd"}, +] + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "main" +optional = true +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pypdf" +version = "3.7.0" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "pypdf-3.7.0-py3-none-any.whl", hash = "sha256:b50c2d3c807af2f75c945b7bdd8f8bb01d513a0c25d6b66bf299b9fad1cbc91c"}, + {file = "pypdf-3.7.0.tar.gz", hash = "sha256:da98eb41428b26f5ab23561cc125eedff450147598d6b6159e62943edc0008fe"}, +] + +[package.dependencies] +typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + +[[package]] +name = "pyrsistent" +version = "0.19.3" +description = "Persistent/Functional/Immutable data structures" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyrsistent-0.19.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a"}, + {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64"}, + {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf"}, + {file = "pyrsistent-0.19.3-cp310-cp310-win32.whl", hash = "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a"}, + {file = "pyrsistent-0.19.3-cp310-cp310-win_amd64.whl", hash = "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da"}, + {file = "pyrsistent-0.19.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9"}, + {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393"}, + {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19"}, + {file = "pyrsistent-0.19.3-cp311-cp311-win32.whl", hash = "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3"}, + {file = "pyrsistent-0.19.3-cp311-cp311-win_amd64.whl", hash = "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8"}, + {file = "pyrsistent-0.19.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28"}, + {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf"}, + {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9"}, + {file = "pyrsistent-0.19.3-cp37-cp37m-win32.whl", hash = "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1"}, + {file = "pyrsistent-0.19.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b"}, + {file = "pyrsistent-0.19.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8"}, + {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a"}, + {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c"}, + {file = "pyrsistent-0.19.3-cp38-cp38-win32.whl", hash = "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c"}, + {file = "pyrsistent-0.19.3-cp38-cp38-win_amd64.whl", hash = "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7"}, + {file = "pyrsistent-0.19.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc"}, + {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2"}, + {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3"}, + {file = "pyrsistent-0.19.3-cp39-cp39-win32.whl", hash = "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2"}, + {file = "pyrsistent-0.19.3-cp39-cp39-win_amd64.whl", hash = "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98"}, + {file = "pyrsistent-0.19.3-py3-none-any.whl", hash = "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64"}, + {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"}, +] + +[[package]] +name = "pytest" +version = "7.2.2" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"}, + {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"}, +] + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.20.3" +description = "Pytest support for asyncio" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.20.3.tar.gz", hash = "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36"}, + {file = "pytest_asyncio-0.20.3-py3-none-any.whl", hash = "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442"}, +] + +[package.dependencies] +pytest = ">=6.1.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-cov" +version = "4.0.0" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-cov-4.0.0.tar.gz", hash = "sha256:996b79efde6433cdbd0088872dbc5fb3ed7fe1578b68cdbba634f14bb8dd0470"}, + {file = "pytest_cov-4.0.0-py3-none-any.whl", hash = "sha256:2feb1b751d66a8bd934e5edfa2e961d11309dc37b73b0eabe73b5945fee20f6b"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "pytest-dotenv" +version = "0.5.2" +description = "A py.test plugin that parses environment files before running tests" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pytest-dotenv-0.5.2.tar.gz", hash = "sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732"}, + {file = "pytest_dotenv-0.5.2-py3-none-any.whl", hash = "sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f"}, +] + +[package.dependencies] +pytest = ">=5.0.0" +python-dotenv = ">=0.9.1" + +[[package]] +name = "pytest-watcher" +version = "0.2.6" +description = "Continiously runs pytest on changes in *.py files" +category = "dev" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pytest-watcher-0.2.6.tar.gz", hash = "sha256:351dfb3477366030ff275bfbfc9f29bee35cd07f16a3355b38bf92766886bae4"}, + {file = "pytest_watcher-0.2.6-py3-none-any.whl", hash = "sha256:0a507159d051c9461790363e0f9b2827c1d82ad2ae8966319598695e485b1dd5"}, +] + +[package.dependencies] +watchdog = ">=2.0.0" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, + {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "python-json-logger" +version = "2.0.7" +description = "A python library adding a json log formatter" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"}, + {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"}, +] + +[[package]] +name = "pytz" +version = "2023.2" +description = "World timezone definitions, modern and historical" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.2-py2.py3-none-any.whl", hash = "sha256:8a8baaf1e237175b02f5c751eea67168043a749c843989e2b3015aa1ad9db68b"}, + {file = "pytz-2023.2.tar.gz", hash = "sha256:a27dcf612c05d2ebde626f7d506555f10dfc815b3eddccfaadfc7d99b11c9a07"}, +] + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pywinpty" +version = "2.0.10" +description = "Pseudo terminal support for Windows from Python." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pywinpty-2.0.10-cp310-none-win_amd64.whl", hash = "sha256:4c7d06ad10f6e92bc850a467f26d98f4f30e73d2fe5926536308c6ae0566bc16"}, + {file = "pywinpty-2.0.10-cp311-none-win_amd64.whl", hash = "sha256:7ffbd66310b83e42028fc9df7746118978d94fba8c1ebf15a7c1275fdd80b28a"}, + {file = "pywinpty-2.0.10-cp37-none-win_amd64.whl", hash = "sha256:38cb924f2778b5751ef91a75febd114776b3af0ae411bc667be45dd84fc881d3"}, + {file = "pywinpty-2.0.10-cp38-none-win_amd64.whl", hash = "sha256:902d79444b29ad1833b8d5c3c9aabdfd428f4f068504430df18074007c8c0de8"}, + {file = "pywinpty-2.0.10-cp39-none-win_amd64.whl", hash = "sha256:3c46aef80dd50979aff93de199e4a00a8ee033ba7a03cadf0a91fed45f0c39d7"}, + {file = "pywinpty-2.0.10.tar.gz", hash = "sha256:cdbb5694cf8c7242c2ecfaca35c545d31fa5d5814c3d67a4e628f803f680ebea"}, +] + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] + +[[package]] +name = "pyzmq" +version = "25.0.2" +description = "Python bindings for 0MQ" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyzmq-25.0.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ac178e666c097c8d3deb5097b58cd1316092fc43e8ef5b5fdb259b51da7e7315"}, + {file = "pyzmq-25.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:659e62e1cbb063151c52f5b01a38e1df6b54feccfa3e2509d44c35ca6d7962ee"}, + {file = "pyzmq-25.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8280ada89010735a12b968ec3ea9a468ac2e04fddcc1cede59cb7f5178783b9c"}, + {file = "pyzmq-25.0.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b5eeb5278a8a636bb0abdd9ff5076bcbb836cd2302565df53ff1fa7d106d54"}, + {file = "pyzmq-25.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a2e5fe42dfe6b73ca120b97ac9f34bfa8414feb15e00e37415dbd51cf227ef6"}, + {file = "pyzmq-25.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:827bf60e749e78acb408a6c5af6688efbc9993e44ecc792b036ec2f4b4acf485"}, + {file = "pyzmq-25.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7b504ae43d37e282301da586529e2ded8b36d4ee2cd5e6db4386724ddeaa6bbc"}, + {file = "pyzmq-25.0.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cb1f69a0a2a2b1aae8412979dd6293cc6bcddd4439bf07e4758d864ddb112354"}, + {file = "pyzmq-25.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b9c9cc965cdf28381e36da525dcb89fc1571d9c54800fdcd73e3f73a2fc29bd"}, + {file = "pyzmq-25.0.2-cp310-cp310-win32.whl", hash = "sha256:24abbfdbb75ac5039205e72d6c75f10fc39d925f2df8ff21ebc74179488ebfca"}, + {file = "pyzmq-25.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6a821a506822fac55d2df2085a52530f68ab15ceed12d63539adc32bd4410f6e"}, + {file = "pyzmq-25.0.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:9af0bb0277e92f41af35e991c242c9c71920169d6aa53ade7e444f338f4c8128"}, + {file = "pyzmq-25.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:54a96cf77684a3a537b76acfa7237b1e79a8f8d14e7f00e0171a94b346c5293e"}, + {file = "pyzmq-25.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88649b19ede1cab03b96b66c364cbbf17c953615cdbc844f7f6e5f14c5e5261c"}, + {file = "pyzmq-25.0.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:715cff7644a80a7795953c11b067a75f16eb9fc695a5a53316891ebee7f3c9d5"}, + {file = "pyzmq-25.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:312b3f0f066b4f1d17383aae509bacf833ccaf591184a1f3c7a1661c085063ae"}, + {file = "pyzmq-25.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d488c5c8630f7e782e800869f82744c3aca4aca62c63232e5d8c490d3d66956a"}, + {file = "pyzmq-25.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:38d9f78d69bcdeec0c11e0feb3bc70f36f9b8c44fc06e5d06d91dc0a21b453c7"}, + {file = "pyzmq-25.0.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3059a6a534c910e1d5d068df42f60d434f79e6cc6285aa469b384fa921f78cf8"}, + {file = "pyzmq-25.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6526d097b75192f228c09d48420854d53dfbc7abbb41b0e26f363ccb26fbc177"}, + {file = "pyzmq-25.0.2-cp311-cp311-win32.whl", hash = "sha256:5c5fbb229e40a89a2fe73d0c1181916f31e30f253cb2d6d91bea7927c2e18413"}, + {file = "pyzmq-25.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:ed15e3a2c3c2398e6ae5ce86d6a31b452dfd6ad4cd5d312596b30929c4b6e182"}, + {file = "pyzmq-25.0.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:032f5c8483c85bf9c9ca0593a11c7c749d734ce68d435e38c3f72e759b98b3c9"}, + {file = "pyzmq-25.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:374b55516393bfd4d7a7daa6c3b36d6dd6a31ff9d2adad0838cd6a203125e714"}, + {file = "pyzmq-25.0.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:08bfcc21b5997a9be4fefa405341320d8e7f19b4d684fb9c0580255c5bd6d695"}, + {file = "pyzmq-25.0.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1a843d26a8da1b752c74bc019c7b20e6791ee813cd6877449e6a1415589d22ff"}, + {file = "pyzmq-25.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:b48616a09d7df9dbae2f45a0256eee7b794b903ddc6d8657a9948669b345f220"}, + {file = "pyzmq-25.0.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:d4427b4a136e3b7f85516c76dd2e0756c22eec4026afb76ca1397152b0ca8145"}, + {file = "pyzmq-25.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:26b0358e8933990502f4513c991c9935b6c06af01787a36d133b7c39b1df37fa"}, + {file = "pyzmq-25.0.2-cp36-cp36m-win32.whl", hash = "sha256:c8fedc3ccd62c6b77dfe6f43802057a803a411ee96f14e946f4a76ec4ed0e117"}, + {file = "pyzmq-25.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:2da6813b7995b6b1d1307329c73d3e3be2fd2d78e19acfc4eff2e27262732388"}, + {file = "pyzmq-25.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a35960c8b2f63e4ef67fd6731851030df68e4b617a6715dd11b4b10312d19fef"}, + {file = "pyzmq-25.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eef2a0b880ab40aca5a878933376cb6c1ec483fba72f7f34e015c0f675c90b20"}, + {file = "pyzmq-25.0.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:85762712b74c7bd18e340c3639d1bf2f23735a998d63f46bb6584d904b5e401d"}, + {file = "pyzmq-25.0.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:64812f29d6eee565e129ca14b0c785744bfff679a4727137484101b34602d1a7"}, + {file = "pyzmq-25.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:510d8e55b3a7cd13f8d3e9121edf0a8730b87d925d25298bace29a7e7bc82810"}, + {file = "pyzmq-25.0.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b164cc3c8acb3d102e311f2eb6f3c305865ecb377e56adc015cb51f721f1dda6"}, + {file = "pyzmq-25.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:28fdb9224a258134784a9cf009b59265a9dde79582fb750d4e88a6bcbc6fa3dc"}, + {file = "pyzmq-25.0.2-cp37-cp37m-win32.whl", hash = "sha256:dd771a440effa1c36d3523bc6ba4e54ff5d2e54b4adcc1e060d8f3ca3721d228"}, + {file = "pyzmq-25.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:9bdc40efb679b9dcc39c06d25629e55581e4c4f7870a5e88db4f1c51ce25e20d"}, + {file = "pyzmq-25.0.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:1f82906a2d8e4ee310f30487b165e7cc8ed09c009e4502da67178b03083c4ce0"}, + {file = "pyzmq-25.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:21ec0bf4831988af43c8d66ba3ccd81af2c5e793e1bf6790eb2d50e27b3c570a"}, + {file = "pyzmq-25.0.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbce982a17c88d2312ec2cf7673985d444f1beaac6e8189424e0a0e0448dbb3"}, + {file = "pyzmq-25.0.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9e1d2f2d86fc75ed7f8845a992c5f6f1ab5db99747fb0d78b5e4046d041164d2"}, + {file = "pyzmq-25.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2e92ff20ad5d13266bc999a29ed29a3b5b101c21fdf4b2cf420c09db9fb690e"}, + {file = "pyzmq-25.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edbbf06cc2719889470a8d2bf5072bb00f423e12de0eb9ffec946c2c9748e149"}, + {file = "pyzmq-25.0.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:77942243ff4d14d90c11b2afd8ee6c039b45a0be4e53fb6fa7f5e4fd0b59da39"}, + {file = "pyzmq-25.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ab046e9cb902d1f62c9cc0eca055b1d11108bdc271caf7c2171487298f229b56"}, + {file = "pyzmq-25.0.2-cp38-cp38-win32.whl", hash = "sha256:ad761cfbe477236802a7ab2c080d268c95e784fe30cafa7e055aacd1ca877eb0"}, + {file = "pyzmq-25.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:8560756318ec7c4c49d2c341012167e704b5a46d9034905853c3d1ade4f55bee"}, + {file = "pyzmq-25.0.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:ab2c056ac503f25a63f6c8c6771373e2a711b98b304614151dfb552d3d6c81f6"}, + {file = "pyzmq-25.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cca8524b61c0eaaa3505382dc9b9a3bc8165f1d6c010fdd1452c224225a26689"}, + {file = "pyzmq-25.0.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cfb9f7eae02d3ac42fbedad30006b7407c984a0eb4189a1322241a20944d61e5"}, + {file = "pyzmq-25.0.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5eaeae038c68748082137d6896d5c4db7927e9349237ded08ee1bbd94f7361c9"}, + {file = "pyzmq-25.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a31992a8f8d51663ebf79df0df6a04ffb905063083d682d4380ab8d2c67257c"}, + {file = "pyzmq-25.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6a979e59d2184a0c8f2ede4b0810cbdd86b64d99d9cc8a023929e40dce7c86cc"}, + {file = "pyzmq-25.0.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1f124cb73f1aa6654d31b183810febc8505fd0c597afa127c4f40076be4574e0"}, + {file = "pyzmq-25.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:65c19a63b4a83ae45d62178b70223adeee5f12f3032726b897431b6553aa25af"}, + {file = "pyzmq-25.0.2-cp39-cp39-win32.whl", hash = "sha256:83d822e8687621bed87404afc1c03d83fa2ce39733d54c2fd52d8829edb8a7ff"}, + {file = "pyzmq-25.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:24683285cc6b7bf18ad37d75b9db0e0fefe58404e7001f1d82bf9e721806daa7"}, + {file = "pyzmq-25.0.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a4b4261eb8f9ed71f63b9eb0198dd7c934aa3b3972dac586d0ef502ba9ab08b"}, + {file = "pyzmq-25.0.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:62ec8d979f56c0053a92b2b6a10ff54b9ec8a4f187db2b6ec31ee3dd6d3ca6e2"}, + {file = "pyzmq-25.0.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:affec1470351178e892121b3414c8ef7803269f207bf9bef85f9a6dd11cde264"}, + {file = "pyzmq-25.0.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffc71111433bd6ec8607a37b9211f4ef42e3d3b271c6d76c813669834764b248"}, + {file = "pyzmq-25.0.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:6fadc60970714d86eff27821f8fb01f8328dd36bebd496b0564a500fe4a9e354"}, + {file = "pyzmq-25.0.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:269968f2a76c0513490aeb3ba0dc3c77b7c7a11daa894f9d1da88d4a0db09835"}, + {file = "pyzmq-25.0.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f7c8b8368e84381ae7c57f1f5283b029c888504aaf4949c32e6e6fb256ec9bf0"}, + {file = "pyzmq-25.0.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:25e6873a70ad5aa31e4a7c41e5e8c709296edef4a92313e1cd5fc87bbd1874e2"}, + {file = "pyzmq-25.0.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b733076ff46e7db5504c5e7284f04a9852c63214c74688bdb6135808531755a3"}, + {file = "pyzmq-25.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:a6f6ae12478fdc26a6d5fdb21f806b08fa5403cd02fd312e4cb5f72df078f96f"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:67da1c213fbd208906ab3470cfff1ee0048838365135a9bddc7b40b11e6d6c89"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531e36d9fcd66f18de27434a25b51d137eb546931033f392e85674c7a7cea853"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34a6fddd159ff38aa9497b2e342a559f142ab365576284bc8f77cb3ead1f79c5"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b491998ef886662c1f3d49ea2198055a9a536ddf7430b051b21054f2a5831800"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5d496815074e3e3d183fe2c7fcea2109ad67b74084c254481f87b64e04e9a471"}, + {file = "pyzmq-25.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:56a94ab1d12af982b55ca96c6853db6ac85505e820d9458ac76364c1998972f4"}, + {file = "pyzmq-25.0.2.tar.gz", hash = "sha256:6b8c1bbb70e868dc88801aa532cae6bd4e3b5233784692b786f17ad2962e5149"}, +] + +[package.dependencies] +cffi = {version = "*", markers = "implementation_name == \"pypy\""} + +[[package]] +name = "qdrant-client" +version = "1.1.0" +description = "Client library for the Qdrant vector search engine" +category = "main" +optional = true +python-versions = ">=3.7,<3.12" +files = [ + {file = "qdrant_client-1.1.0-py3-none-any.whl", hash = "sha256:60aa8f76a78b07980b5d8a602632c576a3ed8f446f900ab47446886e3b35a1af"}, + {file = "qdrant_client-1.1.0.tar.gz", hash = "sha256:b6258f4178d891433beeb80a61b406e23762f5cfc8d964ccab9cbef732dac7fd"}, +] + +[package.dependencies] +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.14.0", extras = ["http2"]} +numpy = {version = ">=1.21", markers = "python_version >= \"3.8\""} +pydantic = ">=1.8,<2.0" +typing-extensions = ">=4.0.0,<5.0.0" +urllib3 = ">=1.26.14,<2.0.0" + +[[package]] +name = "qtconsole" +version = "5.4.1" +description = "Jupyter Qt console" +category = "dev" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "qtconsole-5.4.1-py3-none-any.whl", hash = "sha256:bae8c7e10170cdcdcaf7e6d53ad7d6a7412249b9b8310a0eaa6b6f3b260f32db"}, + {file = "qtconsole-5.4.1.tar.gz", hash = "sha256:f67a03f40f722e13261791280f73068dbaf9dafcc335cbba644ccc8f892640e5"}, +] + +[package.dependencies] +ipykernel = ">=4.1" +ipython-genutils = "*" +jupyter-client = ">=4.1" +jupyter-core = "*" +packaging = "*" +pygments = "*" +pyzmq = ">=17.1" +qtpy = ">=2.0.1" +traitlets = "<5.2.1 || >5.2.1,<5.2.2 || >5.2.2" + +[package.extras] +doc = ["Sphinx (>=1.3)"] +test = ["flaky", "pytest", "pytest-qt"] + +[[package]] +name = "qtpy" +version = "2.3.0" +description = "Provides an abstraction layer on top of the various Qt bindings (PyQt5/6 and PySide2/6)." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "QtPy-2.3.0-py3-none-any.whl", hash = "sha256:8d6d544fc20facd27360ea189592e6135c614785f0dec0b4f083289de6beb408"}, + {file = "QtPy-2.3.0.tar.gz", hash = "sha256:0603c9c83ccc035a4717a12908bf6bc6cb22509827ea2ec0e94c2da7c9ed57c5"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +test = ["pytest (>=6,!=7.0.0,!=7.0.1)", "pytest-cov (>=3.0.0)", "pytest-qt"] + +[[package]] +name = "redis" +version = "4.5.3" +description = "Python client for Redis database and key-value store" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "redis-4.5.3-py3-none-any.whl", hash = "sha256:7df17a0a2b72a4c8895b462dd07616c51b1dcb48fdd7ecb7b6f4bf39ecb2e94e"}, + {file = "redis-4.5.3.tar.gz", hash = "sha256:56732e156fe31801c4f43396bd3ca0c2a7f6f83d7936798531b9848d103381aa"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.2", markers = "python_version < \"3.11\""} + +[package.extras] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] + +[[package]] +name = "regex" +version = "2023.3.23" +description = "Alternative regular expression module, to replace re." +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "regex-2023.3.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:845a5e2d84389c4ddada1a9b95c055320070f18bb76512608374aca00d22eca8"}, + {file = "regex-2023.3.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87d9951f5a538dd1d016bdc0dcae59241d15fa94860964833a54d18197fcd134"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37ae17d3be44c0b3f782c28ae9edd8b47c1f1776d4cabe87edc0b98e1f12b021"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b8eb1e3bca6b48dc721818a60ae83b8264d4089a4a41d62be6d05316ec38e15"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df45fac182ebc3c494460c644e853515cc24f5ad9da05f8ffb91da891bfee879"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7006105b10b59971d3b248ad75acc3651c7e4cf54d81694df5a5130a3c3f7ea"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93f3f1aa608380fe294aa4cb82e2afda07a7598e828d0341e124b8fd9327c715"}, + {file = "regex-2023.3.23-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787954f541ab95d8195d97b0b8cf1dc304424adb1e07365967e656b92b38a699"}, + {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:20abe0bdf03630fe92ccafc45a599bca8b3501f48d1de4f7d121153350a2f77d"}, + {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11d00c31aeab9a6e0503bc77e73ed9f4527b3984279d997eb145d7c7be6268fd"}, + {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:d5bbe0e1511b844794a3be43d6c145001626ba9a6c1db8f84bdc724e91131d9d"}, + {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ea3c0cb56eadbf4ab2277e7a095676370b3e46dbfc74d5c383bd87b0d6317910"}, + {file = "regex-2023.3.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d895b4c863059a4934d3e874b90998df774644a41b349ebb330f85f11b4ef2c0"}, + {file = "regex-2023.3.23-cp310-cp310-win32.whl", hash = "sha256:9d764514d19b4edcc75fd8cb1423448ef393e8b6cbd94f38cab983ab1b75855d"}, + {file = "regex-2023.3.23-cp310-cp310-win_amd64.whl", hash = "sha256:11d1f2b7a0696dc0310de0efb51b1f4d813ad4401fe368e83c0c62f344429f98"}, + {file = "regex-2023.3.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a9c63cde0eaa345795c0fdeb19dc62d22e378c50b0bc67bf4667cd5b482d98b"}, + {file = "regex-2023.3.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dd7200b4c27b68cf9c9646da01647141c6db09f48cc5b51bc588deaf8e98a797"}, + {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22720024b90a6ba673a725dcc62e10fb1111b889305d7c6b887ac7466b74bedb"}, + {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b190a339090e6af25f4a5fd9e77591f6d911cc7b96ecbb2114890b061be0ac1"}, + {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e76b6fc0d8e9efa39100369a9b3379ce35e20f6c75365653cf58d282ad290f6f"}, + {file = "regex-2023.3.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7868b8f218bf69a2a15402fde08b08712213a1f4b85a156d90473a6fb6b12b09"}, + {file = "regex-2023.3.23-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2472428efc4127374f494e570e36b30bb5e6b37d9a754f7667f7073e43b0abdd"}, + {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c37df2a060cb476d94c047b18572ee2b37c31f831df126c0da3cd9227b39253d"}, + {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4479f9e2abc03362df4045b1332d4a2b7885b245a30d4f4b051c4083b97d95d8"}, + {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2396e0678167f2d0c197da942b0b3fb48fee2f0b5915a0feb84d11b6686afe6"}, + {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75f288c60232a5339e0ff2fa05779a5e9c74e9fc085c81e931d4a264501e745b"}, + {file = "regex-2023.3.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c869260aa62cee21c5eb171a466c0572b5e809213612ef8d495268cd2e34f20d"}, + {file = "regex-2023.3.23-cp311-cp311-win32.whl", hash = "sha256:25f0532fd0c53e96bad84664171969de9673b4131f2297f1db850d3918d58858"}, + {file = "regex-2023.3.23-cp311-cp311-win_amd64.whl", hash = "sha256:5ccfafd98473e007cebf7da10c1411035b7844f0f204015efd050601906dbb53"}, + {file = "regex-2023.3.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6572ff287176c0fb96568adb292674b421fa762153ed074d94b1d939ed92c253"}, + {file = "regex-2023.3.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a610e0adfcb0fc84ea25f6ea685e39e74cbcd9245a72a9a7aab85ff755a5ed27"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086afe222d58b88b62847bdbd92079b4699350b4acab892f88a935db5707c790"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79e29fd62fa2f597a6754b247356bda14b866131a22444d67f907d6d341e10f3"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c07ce8e9eee878a48ebeb32ee661b49504b85e164b05bebf25420705709fdd31"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b036f401895e854de9fefe061518e78d506d8a919cc250dc3416bca03f6f9a"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78ac8dd8e18800bb1f97aad0d73f68916592dddf233b99d2b5cabc562088503a"}, + {file = "regex-2023.3.23-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:539dd010dc35af935b32f248099e38447bbffc10b59c2b542bceead2bed5c325"}, + {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9bf4a5626f2a0ea006bf81e8963f498a57a47d58907eaa58f4b3e13be68759d8"}, + {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf86b4328c204c3f315074a61bc1c06f8a75a8e102359f18ce99fbcbbf1951f0"}, + {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:2848bf76673c83314068241c8d5b7fa9ad9bed866c979875a0e84039349e8fa7"}, + {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c125a02d22c555e68f7433bac8449992fa1cead525399f14e47c2d98f2f0e467"}, + {file = "regex-2023.3.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cd1671e9d5ac05ce6aa86874dd8dfa048824d1dbe73060851b310c6c1a201a96"}, + {file = "regex-2023.3.23-cp38-cp38-win32.whl", hash = "sha256:fffe57312a358be6ec6baeb43d253c36e5790e436b7bf5b7a38df360363e88e9"}, + {file = "regex-2023.3.23-cp38-cp38-win_amd64.whl", hash = "sha256:dbb3f87e15d3dd76996d604af8678316ad2d7d20faa394e92d9394dfd621fd0c"}, + {file = "regex-2023.3.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c88e8c226473b5549fe9616980ea7ca09289246cfbdf469241edf4741a620004"}, + {file = "regex-2023.3.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6560776ec19c83f3645bbc5db64a7a5816c9d8fb7ed7201c5bcd269323d88072"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b1fc2632c01f42e06173d8dd9bb2e74ab9b0afa1d698058c867288d2c7a31f3"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fdf7ad455f1916b8ea5cdbc482d379f6daf93f3867b4232d14699867a5a13af7"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fc33b27b1d800fc5b78d7f7d0f287e35079ecabe68e83d46930cf45690e1c8c"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c49552dc938e3588f63f8a78c86f3c9c75301e813bca0bef13bdb4b87ccf364"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e152461e9a0aedec7d37fc66ec0fa635eca984777d3d3c3e36f53bf3d3ceb16e"}, + {file = "regex-2023.3.23-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:db034255e72d2995cf581b14bb3fc9c00bdbe6822b49fcd4eef79e1d5f232618"}, + {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:55ae114da21b7a790b90255ea52d2aa3a0d121a646deb2d3c6a3194e722fc762"}, + {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ef3f528fe1cc3d139508fe1b22523745aa77b9d6cb5b0bf277f48788ee0b993f"}, + {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:a81c9ec59ca2303acd1ccd7b9ac409f1e478e40e96f8f79b943be476c5fdb8bb"}, + {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cde09c4fdd070772aa2596d97e942eb775a478b32459e042e1be71b739d08b77"}, + {file = "regex-2023.3.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3cd9f5dd7b821f141d3a6ca0d5d9359b9221e4f051ca3139320adea9f1679691"}, + {file = "regex-2023.3.23-cp39-cp39-win32.whl", hash = "sha256:7304863f3a652dab5e68e6fb1725d05ebab36ec0390676d1736e0571ebb713ef"}, + {file = "regex-2023.3.23-cp39-cp39-win_amd64.whl", hash = "sha256:54c3fa855a3f7438149de3211738dd9b5f0c733f48b54ae05aa7fce83d48d858"}, + {file = "regex-2023.3.23.tar.gz", hash = "sha256:dc80df325b43ffea5cdea2e3eaa97a44f3dd298262b1c7fe9dbb2a9522b956a7"}, +] + +[[package]] +name = "requests" +version = "2.28.2" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" +files = [ + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +category = "main" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "responses" +version = "0.22.0" +description = "A utility library for mocking out the `requests` Python library." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "responses-0.22.0-py3-none-any.whl", hash = "sha256:dcf294d204d14c436fddcc74caefdbc5764795a40ff4e6a7740ed8ddbf3294be"}, + {file = "responses-0.22.0.tar.gz", hash = "sha256:396acb2a13d25297789a5866b4881cf4e46ffd49cc26c43ab1117f40b973102e"}, +] + +[package.dependencies] +requests = ">=2.22.0,<3.0" +toml = "*" +types-toml = "*" +urllib3 = ">=1.25.10" + +[package.extras] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "types-requests"] + +[[package]] +name = "rfc3339-validator" +version = "0.1.4" +description = "A pure python RFC3339 validator" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, + {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "rfc3986" +version = "1.5.0" +description = "Validating URI References per RFC 3986" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, + {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, +] + +[package.dependencies] +idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} + +[package.extras] +idna2008 = ["idna"] + +[[package]] +name = "rfc3986-validator" +version = "0.1.1" +description = "Pure python rfc3986 validator" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9"}, + {file = "rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055"}, +] + +[[package]] +name = "rich" +version = "13.3.2" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "main" +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.3.2-py3-none-any.whl", hash = "sha256:a104f37270bf677148d8acb07d33be1569eeee87e2d1beb286a4e9113caf6f2f"}, + {file = "rich-13.3.2.tar.gz", hash = "sha256:91954fe80cfb7985727a467ca98a7618e5dd15178cc2da10f553b36a93859001"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0,<3.0.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +category = "main" +optional = true +python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "ruff" +version = "0.0.249" +description = "An extremely fast Python linter, written in Rust." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.0.249-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:03a26f1cb5605508de49d921d0970895b9e3ad4021f776a53be18fa95a4fc25b"}, + {file = "ruff-0.0.249-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:46537d960221e97adc6a3556159ab3ae4b722b9985de13c50b436732d4659af0"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2dcc1f3053092aeedef8e47704e301b74687fa480fe5e7ebef2b0eb2e4a0bd"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:855cfe47d146a1eb68347025c7b5ad651c083343de6cb7ccf90585bda3e381db"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf3af16748c8539a48451edbcb687994eccc6a764c95f42de22195007ae13a24"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:2815e05ba168dee6708dbbdab8d0c145bb3b0085c91ee552839c1c18a52f6cb1"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ab43389216cc8403db84992977e6f5e8fee83bd10aca05e1f2f262754cd8384"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b67c44ab260d3a838ec237c7234be1098bf2ef1421036fbbb229698513d1fc3"}, + {file = "ruff-0.0.249-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3d859744e1cc95ad5e52c4642509b3abb5ea0833f0529c380c2731b8cab5726"}, + {file = "ruff-0.0.249-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6f494276ee281eb09c7026cc17df1bfc2fe59ab39a87196014ce093ff27f1a0"}, + {file = "ruff-0.0.249-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:be23c57b9551d8fcf559755e5bc56ac5bcbc3215fc8a3190ea6ed1bb9133d8dd"}, + {file = "ruff-0.0.249-py3-none-musllinux_1_2_i686.whl", hash = "sha256:980a3bce8ba38c9b47bc000915e80a672add9f7e9c5b128375486ec8cd8f860d"}, + {file = "ruff-0.0.249-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f1e988e9365b11c6d7796c0d4a0556f6a26f0627fe57e9e7411ff91f421fb502"}, + {file = "ruff-0.0.249-py3-none-win32.whl", hash = "sha256:f4837a7e6d1ff81cb027695deb28793e0945cca8d88e87b46ff845ef38d52c82"}, + {file = "ruff-0.0.249-py3-none-win_amd64.whl", hash = "sha256:4cc437ab55a35088008dbe9db598cd8e240b5f70fb88eb8ab6fa1de529007f30"}, + {file = "ruff-0.0.249-py3-none-win_arm64.whl", hash = "sha256:3d2d11a7b750433f3acec30641faab673d101aa86a2ddfe4af8bcfa773b178e2"}, + {file = "ruff-0.0.249.tar.gz", hash = "sha256:b590689f08ecef971c45555cbda6854cdf48f3828fc326802828e851b1a14b3d"}, +] + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, + {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "scikit-learn" +version = "1.2.2" +description = "A set of python modules for machine learning and data mining" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.2.2.tar.gz", hash = "sha256:8429aea30ec24e7a8c7ed8a3fa6213adf3814a6efbea09e16e0a0c71e1a1a3d7"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99cc01184e347de485bf253d19fcb3b1a3fb0ee4cea5ee3c43ec0cc429b6d29f"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e6e574db9914afcb4e11ade84fab084536a895ca60aadea3041e85b8ac963edb"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fe83b676f407f00afa388dd1fdd49e5c6612e551ed84f3b1b182858f09e987d"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2642baa0ad1e8f8188917423dd73994bf25429f8893ddbe115be3ca3183584"}, + {file = "scikit_learn-1.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ad66c3848c0a1ec13464b2a95d0a484fd5b02ce74268eaa7e0c697b904f31d6c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dfeaf8be72117eb61a164ea6fc8afb6dfe08c6f90365bde2dc16456e4bc8e45f"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:fe0aa1a7029ed3e1dcbf4a5bc675aa3b1bc468d9012ecf6c6f081251ca47f590"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:065e9673e24e0dc5113e2dd2b4ca30c9d8aa2fa90f4c0597241c93b63130d233"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf036ea7ef66115e0d49655f16febfa547886deba20149555a41d28f56fd6d3c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:8b0670d4224a3c2d596fd572fb4fa673b2a0ccfb07152688ebd2ea0b8c61025c"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9c710ff9f9936ba8a3b74a455ccf0dcf59b230caa1e9ba0223773c490cab1e51"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:2dd3ffd3950e3d6c0c0ef9033a9b9b32d910c61bd06cb8206303fb4514b88a49"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44b47a305190c28dd8dd73fc9445f802b6ea716669cfc22ab1eb97b335d238b1"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:953236889928d104c2ef14027539f5f2609a47ebf716b8cbe4437e85dce42744"}, + {file = "scikit_learn-1.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:7f69313884e8eb311460cc2f28676d5e400bd929841a2c8eb8742ae78ebf7c20"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8156db41e1c39c69aa2d8599ab7577af53e9e5e7a57b0504e116cc73c39138dd"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fe175ee1dab589d2e1033657c5b6bec92a8a3b69103e3dd361b58014729975c3"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d5312d9674bed14f73773d2acf15a3272639b981e60b72c9b190a0cffed5bad"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea061bf0283bf9a9f36ea3c5d3231ba2176221bbd430abd2603b1c3b2ed85c89"}, + {file = "scikit_learn-1.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:6477eed40dbce190f9f9e9d0d37e020815825b300121307942ec2110302b66a3"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.3.2" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=22.3.0)", "flake8 (>=3.8.2)", "matplotlib (>=3.1.3)", "mypy (>=0.961)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=5.3.1)", "pytest-cov (>=2.9.0)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.9.3" +description = "Fundamental algorithms for scientific computing in Python" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"}, + {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"}, + {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"}, + {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"}, + {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"}, + {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"}, + {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<1.26.0" + +[package.extras] +dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] +test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "send2trash" +version = "1.8.0" +description = "Send file to trash natively under Mac OS X, Windows and Linux." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "Send2Trash-1.8.0-py3-none-any.whl", hash = "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08"}, + {file = "Send2Trash-1.8.0.tar.gz", hash = "sha256:d2c24762fd3759860a0aff155e45871447ea58d2be6bdd39b5c8f966a0c99c2d"}, +] + +[package.extras] +nativelib = ["pyobjc-framework-Cocoa", "pywin32"] +objc = ["pyobjc-framework-Cocoa"] +win32 = ["pywin32"] + +[[package]] +name = "sentence-transformers" +version = "2.2.2" +description = "Multilingual text embeddings" +category = "main" +optional = true +python-versions = ">=3.6.0" +files = [ + {file = "sentence-transformers-2.2.2.tar.gz", hash = "sha256:dbc60163b27de21076c9a30d24b5b7b6fa05141d68cf2553fa9a77bf79a29136"}, +] + +[package.dependencies] +huggingface-hub = ">=0.4.0" +nltk = "*" +numpy = "*" +scikit-learn = "*" +scipy = "*" +sentencepiece = "*" +torch = ">=1.6.0" +torchvision = "*" +tqdm = "*" +transformers = ">=4.6.0,<5.0.0" + +[[package]] +name = "sentencepiece" +version = "0.1.97" +description = "SentencePiece python wrapper" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "sentencepiece-0.1.97-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6f249c8f1852893be86eae66b19d522c5fb30bbad4fe2d1b07f06fdc86e1907e"}, + {file = "sentencepiece-0.1.97-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09e1bc53178de70c557a9ba4fece07364b4416ce3d36570726b3372b68aea135"}, + {file = "sentencepiece-0.1.97-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:667193c57fb48b238be7e3d7636cfc8da56cb5bac5559d8f0b647334e1175be8"}, + {file = "sentencepiece-0.1.97-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2780531985af79c6163f63d4f200fec8a28b70b6768d2c19f70d01568a4524e8"}, + {file = "sentencepiece-0.1.97-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:205050670c53ef9015e2a98cce3934bfbcf0aafaa14caa0c618dd5667bc217ee"}, + {file = "sentencepiece-0.1.97-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28b183dadef8e8b6b4645c1c20692d7be0a13ecc3ec1a07b3885c8905516675f"}, + {file = "sentencepiece-0.1.97-cp310-cp310-win32.whl", hash = "sha256:ee3c9dbd558d8d85bb1617087b86df6ea2b856a528669630ce6cedeb4353b823"}, + {file = "sentencepiece-0.1.97-cp310-cp310-win_amd64.whl", hash = "sha256:f7dc55379e2f7dee86537180283db2e5f8418c6825fdd2fe436c724eb5604c05"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ba1b4154f9144c5a7528b00aff5cffaa1a896a1c6ca53ca78b6e74cd2dae5244"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac3d90aee5581e55d029d124ac11b6ae2fbae0817863b664b2f2302e966ababb"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c27400f1ac46518a01c87cb7703650e4e48728649feb115d2e3f1102a946a42"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6e12a166eba75994ca749aadc4a5056b91b31405f805d6de6e8914cc9741c60"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-win32.whl", hash = "sha256:ed85dff5c0a9b3dd1a414c7e1119f2a19e863fc3f81da525bf7f885ebc883de0"}, + {file = "sentencepiece-0.1.97-cp36-cp36m-win_amd64.whl", hash = "sha256:91a19ab6f40ffbae6d6127119953d2c6a85e93d734953dbc8629fde0d21ace66"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bae580e4a35a9314ff49561ac7c06574fe6afc71b821ed6bb00534e571458156"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad7262e7530c683b186672b5dd0082f82719a50a500a8cfbc4bbd7cde5bff8c"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:620cee35279720016735a7c7103cddbd9b84fe5e2f098bd5e673834d69fee2b8"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93b921b59914c0ec6697e8c6d5e6b44d99d1298fb1a0af56980a79ade0540c19"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-win32.whl", hash = "sha256:9b9a4c44a31d5f47616e9568dcf31e029b0bfa776e0a252c0b59247881598b09"}, + {file = "sentencepiece-0.1.97-cp37-cp37m-win_amd64.whl", hash = "sha256:f31533cdacced56219e239d3459a003ece35116920dd64b2309d4ad047b77644"}, + {file = "sentencepiece-0.1.97-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7d643c01d1cad13b9206a276bbe5bc1a468e3d7cf6a26bde7783f945277f859d"}, + {file = "sentencepiece-0.1.97-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:542f1985b1ee279a92bef7740ec0781452372028ce01e15aa88df3228b197ba3"}, + {file = "sentencepiece-0.1.97-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:93701da21fea906dd244bf88cdbe640385a89c45d3c1812b76dbadf8782cdbcd"}, + {file = "sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a51514047b964047b7fadb480d88a5e0f72c02f6ca1ba96258fbbc6e79274a94"}, + {file = "sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3ae2e9b7a5b6f2aa64ec9240b0c185dabe597d0e787dc4344acfbaef1ffe0b2"}, + {file = "sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923ee4af16dbae1f2ab358ed09f8a0eb89e40a8198a8b343bf54181482342721"}, + {file = "sentencepiece-0.1.97-cp38-cp38-win32.whl", hash = "sha256:fa6f2b88850b5fae3a05053658824cf9f147c8e3c3b40eb64539a976c83d8a24"}, + {file = "sentencepiece-0.1.97-cp38-cp38-win_amd64.whl", hash = "sha256:5137ff0d0b1cc574751d178650ef800ff8d90bf21eb9f71e9567d4a0548940a5"}, + {file = "sentencepiece-0.1.97-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f92876271a10494671431ad955bff2d6f8ea59baaf957f5ae5946aff56dfcb90"}, + {file = "sentencepiece-0.1.97-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:35c227b6d55e473033db7e0ecc51b1e99e6ed7607cc08602fb5768132543c81d"}, + {file = "sentencepiece-0.1.97-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1706a8a8188f7b3d4b7922db9bb00c64c4e16ee68ab4caaae79f55b3e18748c7"}, + {file = "sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce61efc1862ccb18856c4aabbd930e13d5bfbb4b09b4f111081ac53a9dc62275"}, + {file = "sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a78c03800ef9f02d320e0159f5768b15357f3e9ebea545c9c4ba7928ba8ba254"}, + {file = "sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753b8088fd685ee787d9f54c84275ab347de558c7c4ebc6accb4c35bf7776f20"}, + {file = "sentencepiece-0.1.97-cp39-cp39-win32.whl", hash = "sha256:24306fd86031c17a1a6ae92671e76a350390a3140a65620bc2843dad7db24e2a"}, + {file = "sentencepiece-0.1.97-cp39-cp39-win_amd64.whl", hash = "sha256:c6641d0b7acec61fde5881ea6ebe098c169557ac9aa3bdabdf124eab5a5592bb"}, + {file = "sentencepiece-0.1.97.tar.gz", hash = "sha256:c901305e0a710bbcd296f66d79e96f744e6e175b29812bd5178318437d4e1f6c"}, +] + +[[package]] +name = "setuptools" +version = "67.6.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"}, + {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "smart-open" +version = "6.3.0" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +category = "main" +optional = true +python-versions = ">=3.6,<4.0" +files = [ + {file = "smart_open-6.3.0-py3-none-any.whl", hash = "sha256:b4c9ae193ad6d3e7add50944b86afa0d150bd821ab8ec21edb26d9a06b66f6a8"}, + {file = "smart_open-6.3.0.tar.gz", hash = "sha256:d5238825fe9a9340645fac3d75b287c08fbb99fb2b422477de781c9f5f09e019"}, +] + +[package.extras] +all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests"] +azure = ["azure-common", "azure-core", "azure-storage-blob"] +gcs = ["google-cloud-storage (>=2.6.0)"] +http = ["requests"] +s3 = ["boto3"] +ssh = ["paramiko"] +test = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "paramiko", "pytest", "pytest-rerunfailures", "requests", "responses"] +webhdfs = ["requests"] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + +[[package]] +name = "soupsieve" +version = "2.4" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "soupsieve-2.4-py3-none-any.whl", hash = "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955"}, + {file = "soupsieve-2.4.tar.gz", hash = "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"}, +] + +[[package]] +name = "spacy" +version = "3.5.1" +description = "Industrial-strength Natural Language Processing (NLP) in Python" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "spacy-3.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:428925faf26a7c7a9564431d7a505af7816b22b5c68b240bbe073ae928e9ef36"}, + {file = "spacy-3.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:63bc19f4b5fa5f806698e7d16828cacbfefd0ab44f770e0b2a1a0509dd07f6f9"}, + {file = "spacy-3.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f5a1073cc7bb9896624682f6a5ab29c2d3d2d935cb36f88b25cbb01f12b57ef"}, + {file = "spacy-3.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb9af95d1c06e23e89731d61f3fa5f28583684e10bd3d29d9e7bb161ffe02df9"}, + {file = "spacy-3.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:dec30afd4916cb4f02449ccec94e2f8a3eb929686e9f96bd74f51f4c07d75577"}, + {file = "spacy-3.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9d2e256c44241b9a2ac3204659891d332d370dfa0e39917254574bc1ffdfb079"}, + {file = "spacy-3.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d337054213f837ae295431a35638bb469c4e4796f6c5ff17d2dd18d545615a0e"}, + {file = "spacy-3.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab9bbd8e34bfabd506f74d2739c6a4e47c899fd7d3f1648bbffde0c16b8a339d"}, + {file = "spacy-3.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ab0e2b406b3953c5975adcc4ac09bdc8fbcb20dd9a2a8ea2774b4d83106c24"}, + {file = "spacy-3.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9cbec19e55fcdb6e4be220c6b6335af96c374a7ac76dffb15f9da95c9d39ce62"}, + {file = "spacy-3.5.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92b590c1c50eb421b6aaa0373b37fbdfb290a130771728e8d06159517cc120d"}, + {file = "spacy-3.5.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2525bc1ec9e784597365daf245f65b9ca9fd8a25fa96f9c7a6b7bfd5048b87bc"}, + {file = "spacy-3.5.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e3f113cbf4331052622ec5c27e581751beba5c62e9af2f21d2798db50a41e04c"}, + {file = "spacy-3.5.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c9e93851a210ccc59112243fc74dcac82191383e7654731c2842326f7d1eb1d"}, + {file = "spacy-3.5.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ca30de7f82ab97e054a457eeb424060091b609114ebf7c90ef1775cac40fe04"}, + {file = "spacy-3.5.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3083ccbcc52102bf53ff797233ea90a7d2b01c3853d811272ebc63de0aff4df5"}, + {file = "spacy-3.5.1-cp37-cp37m-win_amd64.whl", hash = "sha256:1e795b3f85f229ea54ff7f91e15fb5d7afacec5e5fca302dca1bc3224547e4f0"}, + {file = "spacy-3.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fa47b47142883891252dda54da7a79055cb4e703914a90928c2fbe5bd058f4ed"}, + {file = "spacy-3.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d92387989fe9c3bebd60faaeb590206e34ca9c421a52460a058ee5050d9fc8c6"}, + {file = "spacy-3.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1067f7ef0e87341cea2c3187f9b96965f4b0c076a87e22c1aac45ea5586f856"}, + {file = "spacy-3.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab781021e896aae4a0f9f0a5424c75fc5d6ef4c20f56fd115e8605484567fd6"}, + {file = "spacy-3.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:c43b2597649549e84ceda7b658479e28c6e66995ebd9a61e0193b0c0dceffe50"}, + {file = "spacy-3.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9a389f850ab1a3f17e6beb90fd92533bad21a372979496b01a99ae1a9f3e96e3"}, + {file = "spacy-3.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af8ca295e8381a0273b6543c1389275af98878a43ab70c781630277e49ce978f"}, + {file = "spacy-3.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62a458c88c296234471fe540fe5d1ec763701d2f556870512143de8559286c0"}, + {file = "spacy-3.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04ad29a306d1879cafe23e4e8a613046f62d81ceeb70e6fcab3fddb4b0fedf7f"}, + {file = "spacy-3.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c4be3508c9b4109afe3e5c7fdf91b9d7153ec2227f24270625caee96651fa9e2"}, + {file = "spacy-3.5.1.tar.gz", hash = "sha256:811ae1468c58b97fc9aa31187d6b55317784258f0a47ebf69d81cab639e3fa15"}, +] + +[package.dependencies] +catalogue = ">=2.0.6,<2.1.0" +cymem = ">=2.0.2,<2.1.0" +jinja2 = "*" +langcodes = ">=3.2.0,<4.0.0" +murmurhash = ">=0.28.0,<1.1.0" +numpy = ">=1.15.0" +packaging = ">=20.0" +pathy = ">=0.10.0" +preshed = ">=3.0.2,<3.1.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.11.0" +requests = ">=2.13.0,<3.0.0" +setuptools = "*" +smart-open = ">=5.2.1,<7.0.0" +spacy-legacy = ">=3.0.11,<3.1.0" +spacy-loggers = ">=1.0.0,<2.0.0" +srsly = ">=2.4.3,<3.0.0" +thinc = ">=8.1.8,<8.2.0" +tqdm = ">=4.38.0,<5.0.0" +typer = ">=0.3.0,<0.8.0" +wasabi = ">=0.9.1,<1.2.0" + +[package.extras] +apple = ["thinc-apple-ops (>=0.1.0.dev0,<1.0.0)"] +cuda = ["cupy (>=5.0.0b4,<12.0.0)"] +cuda-autodetect = ["cupy-wheel (>=11.0.0,<12.0.0)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4,<12.0.0)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4,<12.0.0)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4,<12.0.0)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4,<12.0.0)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4,<12.0.0)"] +cuda112 = ["cupy-cuda112 (>=5.0.0b4,<12.0.0)"] +cuda113 = ["cupy-cuda113 (>=5.0.0b4,<12.0.0)"] +cuda114 = ["cupy-cuda114 (>=5.0.0b4,<12.0.0)"] +cuda115 = ["cupy-cuda115 (>=5.0.0b4,<12.0.0)"] +cuda116 = ["cupy-cuda116 (>=5.0.0b4,<12.0.0)"] +cuda117 = ["cupy-cuda117 (>=5.0.0b4,<12.0.0)"] +cuda11x = ["cupy-cuda11x (>=11.0.0,<12.0.0)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4,<12.0.0)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4,<12.0.0)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4,<12.0.0)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4,<12.0.0)"] +ja = ["sudachidict-core (>=20211220)", "sudachipy (>=0.5.2,!=0.6.1)"] +ko = ["natto-py (>=0.9.0)"] +lookups = ["spacy-lookups-data (>=1.0.3,<1.1.0)"] +ray = ["spacy-ray (>=0.1.0,<1.0.0)"] +th = ["pythainlp (>=2.0)"] +transformers = ["spacy-transformers (>=1.1.2,<1.3.0)"] + +[[package]] +name = "spacy-legacy" +version = "3.0.12" +description = "Legacy registered functions for spaCy backwards compatibility" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774"}, + {file = "spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f"}, +] + +[[package]] +name = "spacy-loggers" +version = "1.0.4" +description = "Logging utilities for SpaCy" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "spacy-loggers-1.0.4.tar.gz", hash = "sha256:e6f983bf71230091d5bb7b11bf64bd54415eca839108d5f83d9155d0ba93bf28"}, + {file = "spacy_loggers-1.0.4-py3-none-any.whl", hash = "sha256:e050bf2e63208b2f096b777e494971c962ad7c1dc997641c8f95c622550044ae"}, +] + +[[package]] +name = "sphinx" +version = "4.5.0" +description = "Python documentation generator" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "Sphinx-4.5.0-py3-none-any.whl", hash = "sha256:ebf612653238bcc8f4359627a9b7ce44ede6fdd75d9d30f68255c7383d3a6226"}, + {file = "Sphinx-4.5.0.tar.gz", hash = "sha256:7bf8ca9637a4ee15af412d1a1d9689fec70523a68ca9bb9127c2f3eeb344e2e6"}, +] + +[package.dependencies] +alabaster = ">=0.7,<0.8" +babel = ">=1.3" +colorama = {version = ">=0.3.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.14,<0.18" +imagesize = "*" +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} +Jinja2 = ">=2.3" +packaging = "*" +Pygments = ">=2.0" +requests = ">=2.5.0" +snowballstemmer = ">=1.1" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = ">=2.0.0" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = ">=1.1.5" + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["docutils-stubs", "flake8 (>=3.5.0)", "isort", "mypy (>=0.931)", "types-requests", "types-typed-ast"] +test = ["cython", "html5lib", "pytest", "pytest-cov", "typed-ast"] + +[[package]] +name = "sphinx-autobuild" +version = "2021.3.14" +description = "Rebuild Sphinx documentation on changes, with live-reload in the browser." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "sphinx-autobuild-2021.3.14.tar.gz", hash = "sha256:de1ca3b66e271d2b5b5140c35034c89e47f263f2cd5db302c9217065f7443f05"}, + {file = "sphinx_autobuild-2021.3.14-py3-none-any.whl", hash = "sha256:8fe8cbfdb75db04475232f05187c776f46f6e9e04cacf1e49ce81bdac649ccac"}, +] + +[package.dependencies] +colorama = "*" +livereload = "*" +sphinx = "*" + +[package.extras] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "sphinx-book-theme" +version = "0.3.3" +description = "A clean book theme for scientific explanations and documentation with Sphinx" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sphinx_book_theme-0.3.3-py3-none-any.whl", hash = "sha256:9685959dbbb492af005165ef1b9229fdd5d5431580ac181578beae3b4d012d91"}, + {file = "sphinx_book_theme-0.3.3.tar.gz", hash = "sha256:0ec36208ff14c6d6bf8aee1f1f8268e0c6e2bfa3cef6e41143312b25275a6217"}, +] + +[package.dependencies] +pydata-sphinx-theme = ">=0.8.0,<0.9.0" +pyyaml = "*" +sphinx = ">=3,<5" + +[package.extras] +code-style = ["pre-commit (>=2.7.0,<2.8.0)"] +doc = ["ablog (>=0.10.13,<0.11.0)", "folium", "ipywidgets", "matplotlib", "myst-nb (>=0.13.2,<0.14.0)", "nbclient", "numpy", "numpydoc", "pandas", "plotly", "sphinx (>=4.0,<5.0)", "sphinx-copybutton", "sphinx-design", "sphinx-examples", "sphinx-tabs", "sphinx-thebe (>=0.1.1)", "sphinx-togglebutton (>=0.2.1)", "sphinxcontrib-bibtex (>=2.2,<3.0)", "sphinxcontrib-youtube", "sphinxext-opengraph"] +test = ["beautifulsoup4 (>=4.6.1,<5)", "coverage", "myst-nb (>=0.13.2,<0.14.0)", "pytest (>=6.0.1,<6.1.0)", "pytest-cov", "pytest-regressions (>=2.0.1,<2.1.0)", "sphinx_thebe"] + +[[package]] +name = "sphinx-copybutton" +version = "0.5.1" +description = "Add a copy button to each of your code cells." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sphinx-copybutton-0.5.1.tar.gz", hash = "sha256:366251e28a6f6041514bfb5439425210418d6c750e98d3a695b73e56866a677a"}, + {file = "sphinx_copybutton-0.5.1-py3-none-any.whl", hash = "sha256:0842851b5955087a7ec7fc870b622cb168618ad408dee42692e9a5c97d071da8"}, +] + +[package.dependencies] +sphinx = ">=1.8" + +[package.extras] +code-style = ["pre-commit (==2.12.1)"] +rtd = ["ipython", "myst-nb", "sphinx", "sphinx-book-theme", "sphinx-examples"] + +[[package]] +name = "sphinx-panels" +version = "0.6.0" +description = "A sphinx extension for creating panels in a grid layout." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "sphinx-panels-0.6.0.tar.gz", hash = "sha256:d36dcd26358117e11888f7143db4ac2301ebe90873ac00627bf1fe526bf0f058"}, + {file = "sphinx_panels-0.6.0-py3-none-any.whl", hash = "sha256:bd64afaf85c07f8096d21c8247fc6fd757e339d1be97832c8832d6ae5ed2e61d"}, +] + +[package.dependencies] +docutils = "*" +sphinx = ">=2,<5" + +[package.extras] +code-style = ["pre-commit (>=2.7.0,<2.8.0)"] +live-dev = ["sphinx-autobuild", "web-compile (>=0.2.0,<0.3.0)"] +testing = ["pytest (>=6.0.1,<6.1.0)", "pytest-regressions (>=2.0.1,<2.1.0)"] +themes = ["myst-parser (>=0.12.9,<0.13.0)", "pydata-sphinx-theme (>=0.4.0,<0.5.0)", "sphinx-book-theme (>=0.0.36,<0.1.0)", "sphinx-rtd-theme"] + +[[package]] +name = "sphinx-rtd-theme" +version = "1.2.0" +description = "Read the Docs theme for Sphinx" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "sphinx_rtd_theme-1.2.0-py2.py3-none-any.whl", hash = "sha256:f823f7e71890abe0ac6aaa6013361ea2696fc8d3e1fa798f463e82bdb77eeff2"}, + {file = "sphinx_rtd_theme-1.2.0.tar.gz", hash = "sha256:a0d8bd1a2ed52e0b338cbe19c4b2eef3c5e7a048769753dac6a9f059c7b641b8"}, +] + +[package.dependencies] +docutils = "<0.19" +sphinx = ">=1.6,<7" +sphinxcontrib-jquery = {version = ">=2.0.0,<3.0.0 || >3.0.0", markers = "python_version > \"3\""} + +[package.extras] +dev = ["bump2version", "sphinxcontrib-httpdomain", "transifex-client", "wheel"] + +[[package]] +name = "sphinx-typlog-theme" +version = "0.8.0" +description = "A typlog Sphinx theme" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "sphinx_typlog_theme-0.8.0-py2.py3-none-any.whl", hash = "sha256:b0ab728ab31d071523af0229bcb6427a13493958b3fc2bb7db381520fab77de4"}, + {file = "sphinx_typlog_theme-0.8.0.tar.gz", hash = "sha256:61dbf97b1fde441bd03a5409874571e229898b67fb3080400837b8f4cee46659"}, +] + +[package.extras] +dev = ["livereload", "sphinx"] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.4" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e"}, + {file = "sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.2" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document." +category = "dev" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"}, + {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.1" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff"}, + {file = "sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jquery" +version = "4.1" +description = "Extension to include jQuery on newer Sphinx releases" +category = "dev" +optional = false +python-versions = ">=2.7" +files = [ + {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, + {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"}, +] + +[package.dependencies] +Sphinx = ">=1.8" + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +category = "dev" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.3" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document." +category = "dev" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"}, + {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.5" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)." +category = "dev" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"}, + {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sqlalchemy" +version = "1.4.47" +description = "Database Abstraction Library" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "SQLAlchemy-1.4.47-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:dcfb480bfc9e1fab726003ae00a6bfc67a29bad275b63a4e36d17fe7f13a624e"}, + {file = "SQLAlchemy-1.4.47-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:28fda5a69d6182589892422c5a9b02a8fd1125787aab1d83f1392aa955bf8d0a"}, + {file = "SQLAlchemy-1.4.47-cp27-cp27m-win32.whl", hash = "sha256:45e799c1a41822eba6bee4e59b0e38764e1a1ee69873ab2889079865e9ea0e23"}, + {file = "SQLAlchemy-1.4.47-cp27-cp27m-win_amd64.whl", hash = "sha256:10edbb92a9ef611f01b086e271a9f6c1c3e5157c3b0c5ff62310fb2187acbd4a"}, + {file = "SQLAlchemy-1.4.47-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7a4df53472c9030a8ddb1cce517757ba38a7a25699bbcabd57dcc8a5d53f324e"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:511d4abc823152dec49461209607bbfb2df60033c8c88a3f7c93293b8ecbb13d"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbe57f39f531c5d68d5594ea4613daa60aba33bb51a8cc42f96f17bbd6305e8d"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca8ab6748e3ec66afccd8b23ec2f92787a58d5353ce9624dccd770427ee67c82"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299b5c5c060b9fbe51808d0d40d8475f7b3873317640b9b7617c7f988cf59fda"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-win32.whl", hash = "sha256:684e5c773222781775c7f77231f412633d8af22493bf35b7fa1029fdf8066d10"}, + {file = "SQLAlchemy-1.4.47-cp310-cp310-win_amd64.whl", hash = "sha256:2bba39b12b879c7b35cde18b6e14119c5f1a16bd064a48dd2ac62d21366a5e17"}, + {file = "SQLAlchemy-1.4.47-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:795b5b9db573d3ed61fae74285d57d396829e3157642794d3a8f72ec2a5c719b"}, + {file = "SQLAlchemy-1.4.47-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:989c62b96596b7938cbc032e39431e6c2d81b635034571d6a43a13920852fb65"}, + {file = "SQLAlchemy-1.4.47-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3b67bda733da1dcdccaf354e71ef01b46db483a4f6236450d3f9a61efdba35a"}, + {file = "SQLAlchemy-1.4.47-cp311-cp311-win32.whl", hash = "sha256:9a198f690ac12a3a807e03a5a45df6a30cd215935f237a46f4248faed62e69c8"}, + {file = "SQLAlchemy-1.4.47-cp311-cp311-win_amd64.whl", hash = "sha256:03be6f3cb66e69fb3a09b5ea89d77e4bc942f3bf84b207dba84666a26799c166"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:16ee6fea316790980779268da47a9260d5dd665c96f225d28e7750b0bb2e2a04"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:557675e0befafa08d36d7a9284e8761c97490a248474d778373fb96b0d7fd8de"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:bb2797fee8a7914fb2c3dc7de404d3f96eb77f20fc60e9ee38dc6b0ca720f2c2"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28297aa29e035f29cba6b16aacd3680fbc6a9db682258d5f2e7b49ec215dbe40"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-win32.whl", hash = "sha256:998e782c8d9fd57fa8704d149ccd52acf03db30d7dd76f467fd21c1c21b414fa"}, + {file = "SQLAlchemy-1.4.47-cp36-cp36m-win_amd64.whl", hash = "sha256:dde4d02213f1deb49eaaf8be8a6425948963a7af84983b3f22772c63826944de"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e98ef1babe34f37f443b7211cd3ee004d9577a19766e2dbacf62fce73c76245a"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14a3879853208a242b5913f3a17c6ac0eae9dc210ff99c8f10b19d4a1ed8ed9b"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7120a2f72599d4fed7c001fa1cbbc5b4d14929436135768050e284f53e9fbe5e"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:048509d7f3ac27b83ad82fd96a1ab90a34c8e906e4e09c8d677fc531d12c23c5"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-win32.whl", hash = "sha256:6572d7c96c2e3e126d0bb27bfb1d7e2a195b68d951fcc64c146b94f088e5421a"}, + {file = "SQLAlchemy-1.4.47-cp37-cp37m-win_amd64.whl", hash = "sha256:a6c3929df5eeaf3867724003d5c19fed3f0c290f3edc7911616616684f200ecf"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:71d4bf7768169c4502f6c2b0709a02a33703544f611810fb0c75406a9c576ee1"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd45c60cc4f6d68c30d5179e2c2c8098f7112983532897566bb69c47d87127d3"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0fdbb8e9d4e9003f332a93d6a37bca48ba8095086c97a89826a136d8eddfc455"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f216a51451a0a0466e082e163591f6dcb2f9ec182adb3f1f4b1fd3688c7582c"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-win32.whl", hash = "sha256:bd988b3362d7e586ef581eb14771bbb48793a4edb6fcf62da75d3f0f3447060b"}, + {file = "SQLAlchemy-1.4.47-cp38-cp38-win_amd64.whl", hash = "sha256:32ab09f2863e3de51529aa84ff0e4fe89a2cb1bfbc11e225b6dbc60814e44c94"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:07764b240645627bc3e82596435bd1a1884646bfc0721642d24c26b12f1df194"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e2a42017984099ef6f56438a6b898ce0538f6fadddaa902870c5aa3e1d82583"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6b6d807c76c20b4bc143a49ad47782228a2ac98bdcdcb069da54280e138847fc"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a94632ba26a666e7be0a7d7cc3f7acab622a04259a3aa0ee50ff6d44ba9df0d"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-win32.whl", hash = "sha256:f80915681ea9001f19b65aee715115f2ad310730c8043127cf3e19b3009892dd"}, + {file = "SQLAlchemy-1.4.47-cp39-cp39-win_amd64.whl", hash = "sha256:fc700b862e0a859a37faf85367e205e7acaecae5a098794aff52fdd8aea77b12"}, + {file = "SQLAlchemy-1.4.47.tar.gz", hash = "sha256:95fc02f7fc1f3199aaa47a8a757437134cf618e9d994c84effd53f530c38586f"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""} + +[package.extras] +aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] +mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)", "cx-oracle (>=7,<8)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql", "pymysql (<1)"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "sqlitedict" +version = "2.1.0" +description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c"}, +] + +[[package]] +name = "srsly" +version = "2.4.6" +description = "Modern high-performance serialization utilities for Python" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "srsly-2.4.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9b96569976420be2ac3716db9ac05b06bf4cd7a358953879ba34f03c9533c123"}, + {file = "srsly-2.4.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a9833c0a870e17c67a9452ed107b3ec033fa5b7addead51af5977fdafd32452"}, + {file = "srsly-2.4.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0355d57e89382bc0852d30b000f1d04f0bf1da2a739f60f0427a00b6ea1cd146"}, + {file = "srsly-2.4.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2261ef76f6b8d4029b9d2fc4a65ac505a760d2ea1de0132fc4b423883f7df52e"}, + {file = "srsly-2.4.6-cp310-cp310-win_amd64.whl", hash = "sha256:02ab79c59e4b0eba4ba44d64b4aeccb5df1379270f3970dc7e30f1eef6cd3851"}, + {file = "srsly-2.4.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:73acd407c66fa943bbaa8d473c30ea548b31ba4079b51483eda65df94910b61f"}, + {file = "srsly-2.4.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99131465fea74aa5e80dbba6effad10ae661bee2c3fbc1fd6da8a1e954e031d0"}, + {file = "srsly-2.4.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a0152f766930aa41f45bf571b7f6e99206a02810d964cc7bcbd81685e3b624"}, + {file = "srsly-2.4.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9742e5f4205c5484cea925ff24b1bd850f1e9291bd0ada6dfe1ec2b715e732b5"}, + {file = "srsly-2.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:73ce7c532fecbd8d7ab946fd2b5fa1d767d554526e330e55d7704bcf522c9f66"}, + {file = "srsly-2.4.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5c5074628249385640f4fe4ac237fd93631a023938476ea258139d12abb17f9"}, + {file = "srsly-2.4.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12b9e6d5a87c64e1d4a4a43fd6c94f98b5c48076c569804072e5fe45f1703c32"}, + {file = "srsly-2.4.6-cp36-cp36m-win_amd64.whl", hash = "sha256:bac2b2fa1f315c8a50e7807002a064e892be21c95735334f39d2ec104c00a8f0"}, + {file = "srsly-2.4.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3a6811eb797101b549fece201c03ba794ed731e9e2d58b81ea56eb3219ed2c8e"}, + {file = "srsly-2.4.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03477c648b76571a5ab0723423fc03ada74e747c4354357feef92c098853246f"}, + {file = "srsly-2.4.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb1426313af7c560c728fbe8c3cc8e7cc443f5aa489b04a26adc73645214b91"}, + {file = "srsly-2.4.6-cp37-cp37m-win_amd64.whl", hash = "sha256:f1fb1ca8e2415bfd9ce1e3d8612dbbd85dd06c574a0a96a0223265c382950b5a"}, + {file = "srsly-2.4.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:02b0708878f6a1e344284ae7c65b36a9ad8178eeff71583cd212d2d379f0e2ce"}, + {file = "srsly-2.4.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:720715be0efb9646ab64850185ecd22fe6ace93027d02f6367bdc8842450b369"}, + {file = "srsly-2.4.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1da8ac70f994644069451b4ab5fe5d2649218871409ab89f8421e79b0eace76b"}, + {file = "srsly-2.4.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dc1c3877618d67a44ec74830510cd72d54fcfb32339388f2c6cbd559d27d20e"}, + {file = "srsly-2.4.6-cp38-cp38-win_amd64.whl", hash = "sha256:0ca1b1065edeca0cbc4a75ef15e915189bfd4b87c8256d542ec662168dd17627"}, + {file = "srsly-2.4.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0522d9aeaf58c6d58ee0cec247653a460545422d3266b2d970df7af1530f3dcc"}, + {file = "srsly-2.4.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52e3a0a760fb7723c74e566d0c064da78e5707d65d8f69b1d3c2e05b72e3efb2"}, + {file = "srsly-2.4.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da8d393ac59cba12b92c27c53550417200601d0f2a9aa50c1559cf5ce9cb9473"}, + {file = "srsly-2.4.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e5725f18a76971fc00e788a254bc2da6e119d69d491a811a6d387de77b72ca2"}, + {file = "srsly-2.4.6-cp39-cp39-win_amd64.whl", hash = "sha256:52a3b4d2949d9b7623b459054526bc3df04cbd9a8243c4786f13e3c956faf251"}, + {file = "srsly-2.4.6.tar.gz", hash = "sha256:47b41f323aba4c9c3311abf60e443c03a9efe9c69f65dc402d173c32f7744a6f"}, +] + +[package.dependencies] +catalogue = ">=2.0.3,<2.1.0" + +[[package]] +name = "stack-data" +version = "0.6.2" +description = "Extract data from python stack frames and tracebacks for informative displays" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "stack_data-0.6.2-py3-none-any.whl", hash = "sha256:cbb2a53eb64e5785878201a97ed7c7b94883f48b87bfb0bbe8b623c74679e4a8"}, + {file = "stack_data-0.6.2.tar.gz", hash = "sha256:32d2dd0376772d01b6cb9fc996f3c8b57a357089dec328ed4b6553d037eaf815"}, +] + +[package.dependencies] +asttokens = ">=2.1.0" +executing = ">=1.2.0" +pure-eval = "*" + +[package.extras] +tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tenacity" +version = "8.2.2" +description = "Retry code until it succeeds" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tenacity-8.2.2-py3-none-any.whl", hash = "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0"}, + {file = "tenacity-8.2.2.tar.gz", hash = "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "tensorboard" +version = "2.11.2" +description = "TensorBoard lets you watch Tensors Flow" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tensorboard-2.11.2-py3-none-any.whl", hash = "sha256:cbaa2210c375f3af1509f8571360a19ccc3ded1d9641533414874b5deca47e89"}, +] + +[package.dependencies] +absl-py = ">=0.4" +google-auth = ">=1.6.3,<3" +google-auth-oauthlib = ">=0.4.1,<0.5" +grpcio = ">=1.24.3" +markdown = ">=2.6.8" +numpy = ">=1.12.0" +protobuf = ">=3.9.2,<4" +requests = ">=2.21.0,<3" +setuptools = ">=41.0.0" +tensorboard-data-server = ">=0.6.0,<0.7.0" +tensorboard-plugin-wit = ">=1.6.0" +werkzeug = ">=1.0.1" +wheel = ">=0.26" + +[[package]] +name = "tensorboard-data-server" +version = "0.6.1" +description = "Fast data loading for TensorBoard" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "tensorboard_data_server-0.6.1-py3-none-any.whl", hash = "sha256:809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7"}, + {file = "tensorboard_data_server-0.6.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fa8cef9be4fcae2f2363c88176638baf2da19c5ec90addb49b1cde05c95c88ee"}, + {file = "tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl", hash = "sha256:d8237580755e58eff68d1f3abefb5b1e39ae5c8b127cc40920f9c4fb33f4b98a"}, +] + +[[package]] +name = "tensorboard-plugin-wit" +version = "1.8.1" +description = "What-If Tool TensorBoard plugin." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"}, +] + +[[package]] +name = "tensorflow" +version = "2.11.1" +description = "TensorFlow is an open source machine learning framework for everyone." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tensorflow-2.11.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ac0e46c5de7985def49e4f688a0ca4180949a4d5dc62b89e9c6640db3c3982ba"}, + {file = "tensorflow-2.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45b1669c523fa6dc240688bffe79f08dfbb76bf5e23a7fe10e722ba658637a44"}, + {file = "tensorflow-2.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a96595e0c068d54717405fa12f36b4a5bb0a9fc53fb9065155a92cff944b35b"}, + {file = "tensorflow-2.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:13197f18f31a52d3f2eac28743d1b06abb8efd86017f184110a1b16841b745b1"}, + {file = "tensorflow-2.11.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9f030f1bc9e7763fa03ec5738323c42021ababcd562fe861b3a3f41e9ff10e43"}, + {file = "tensorflow-2.11.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f12855c1e8373c1327650061fd6a9a3d3772e1bac8241202ea8ccb56213d005"}, + {file = "tensorflow-2.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cd4279cb500074a8ab28af116af7f060f0b015651bef552769d51e55d6fd5c"}, + {file = "tensorflow-2.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:f5a2f75f28cd5fb615a5306f2091eac7da3a8fff949ab8804ec06b8e3682f837"}, + {file = "tensorflow-2.11.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:ea93246ad6c90ff0422f06a82164836fe8098989a8a65c3b02c720eadbe15dde"}, + {file = "tensorflow-2.11.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ba6b3c2f68037e965a19427a1f2a5f0351b7ceae6c686938a8485b08e1e1f3"}, + {file = "tensorflow-2.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ddd5c61f68d8125c985370de96a24a80aee5e3f1604efacec7e1c34ca72de24"}, + {file = "tensorflow-2.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7d8834df3f72d7eab56bc2f34f2e52b82d705776b80b36bf5470b7538c9865c"}, +] + +[package.dependencies] +absl-py = ">=1.0.0" +astunparse = ">=1.6.0" +flatbuffers = ">=2.0" +gast = ">=0.2.1,<=0.4.0" +google-pasta = ">=0.1.1" +grpcio = ">=1.24.3,<2.0" +h5py = ">=2.9.0" +keras = ">=2.11.0,<2.12" +libclang = ">=13.0.0" +numpy = ">=1.20" +opt-einsum = ">=2.3.2" +packaging = "*" +protobuf = ">=3.9.2,<3.20" +setuptools = "*" +six = ">=1.12.0" +tensorboard = ">=2.11,<2.12" +tensorflow-estimator = ">=2.11.0,<2.12" +tensorflow-io-gcs-filesystem = {version = ">=0.23.1", markers = "platform_machine != \"arm64\" or platform_system != \"Darwin\""} +termcolor = ">=1.1.0" +typing-extensions = ">=3.6.6" +wrapt = ">=1.11.0" + +[[package]] +name = "tensorflow-estimator" +version = "2.11.0" +description = "TensorFlow Estimator." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tensorflow_estimator-2.11.0-py2.py3-none-any.whl", hash = "sha256:ea3b64acfff3d9a244f06178c9bdedcbdd3f125b67d0888dba8229498d06468b"}, +] + +[[package]] +name = "tensorflow-hub" +version = "0.13.0" +description = "TensorFlow Hub is a library to foster the publication, discovery, and consumption of reusable parts of machine learning models." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "tensorflow_hub-0.13.0-py2.py3-none-any.whl", hash = "sha256:3544f4fd9fd99e4eeb6da1b5b5320e4a2dbdef7f9bb778f66f76d6790f32dd65"}, +] + +[package.dependencies] +numpy = ">=1.12.0" +protobuf = ">=3.19.6" + +[package.extras] +make-image-classifier = ["keras-preprocessing[image]"] +make-nearest-neighbour-index = ["annoy", "apache-beam"] + +[[package]] +name = "tensorflow-io-gcs-filesystem" +version = "0.31.0" +description = "TensorFlow IO" +category = "main" +optional = true +python-versions = ">=3.7, <3.12" +files = [ + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:a71421f8d75a093b6aac65b4c8c8d2f768c3ca6215307cf8c16192e62d992bcf"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:359134ecbd3bf938bb0cf65be4526106c30da461b2e2ce05446a229ed35f6832"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b658b33567552f155af2ed848130f787bfda29381fa78cd905d5ee8254364f3c"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp310-cp310-win_amd64.whl", hash = "sha256:961353b38c76471fa296bb7d883322c66b91415e7d47087236a6706db3ab2758"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:8909c4344b0e96aa356230ab460ffafe5900c33c1aaced65fafae71d177a1966"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e417faf8755aafe52d8f8c6b5ae5bae6e4fae8326ee3acd5e9181b83bbfbae87"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c40e3c4ee1f8dda3b545deea6b8839192c82037d8021db9f589908034ad975"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-win_amd64.whl", hash = "sha256:4bb37d23f21c434687b11059cb7ffd094d52a7813368915ba1b7057e3c16e414"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:a7e8d4bd0a25de7637e562997c011294d7ea595a76f315427a5dd522d56e9d49"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fbcfb4aa2eaa9a3038d2487e570ff93feb1dbe51c3a4663d7d9ab9f9a9f9a9d8"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e3933059b1c53e062075de2e355ec136b655da5883c3c26736c45dfeb1901945"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f0adfbcd264262797d429311843733da2d5c1ffb119fbfa6339269b6c0414113"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:20e3ee5df01f2bd81d37fc715816c329b7533ccca967c47946eb458a5b7a7280"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd628609b77aee0e385eadf1628222486f19b8f1d81b5f0a344f2470204df116"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp38-cp38-win_amd64.whl", hash = "sha256:b4ebb30ad7ce5f3769e3d959ea99bd95d80a44099bcf94da6042f9755ac6e850"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:68b89ef9f63f297de1cd9d545bc45dddc7d8fe12bcda4266279b244e8cf3b7c0"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e6d8cc7b14ade870168b9704ee44f9c55b468b9a00ed40e12d20fffd321193b5"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97ebb9a8001a38f615aa1f90d2e998b7bd6eddae7aafc92897833610b039401b"}, + {file = "tensorflow_io_gcs_filesystem-0.31.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb7459c15608fe42973a78e4d3ad7ac79cfc7adae1ccb1b1846db3165fbc081a"}, +] + +[package.extras] +tensorflow = ["tensorflow (>=2.11.0,<2.12.0)"] +tensorflow-aarch64 = ["tensorflow-aarch64 (>=2.11.0,<2.12.0)"] +tensorflow-cpu = ["tensorflow-cpu (>=2.11.0,<2.12.0)"] +tensorflow-gpu = ["tensorflow-gpu (>=2.11.0,<2.12.0)"] +tensorflow-rocm = ["tensorflow-rocm (>=2.11.0,<2.12.0)"] + +[[package]] +name = "tensorflow-macos" +version = "2.11.0" +description = "TensorFlow is an open source machine learning framework for everyone." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tensorflow_macos-2.11.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0bdbd1bb564d01bd735d6d11451f0658c3dd8187369ee9dd3ed6de6bbdd6df53"}, + {file = "tensorflow_macos-2.11.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:66eb67915cf418eddd3b4c158132609efd50895fa09fd55e4b2f14a3ab85bd34"}, + {file = "tensorflow_macos-2.11.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:6810731e2c8353123f6c9c944d2765b58a2226e7eb9fec1e360f73977c6c6aa4"}, + {file = "tensorflow_macos-2.11.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:881b36d97b67d24197250a091c52c31db14aecfdbf1ac20418a148ec37321978"}, + {file = "tensorflow_macos-2.11.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8d56b0d0bd140008b0cc4877804c9c310e1e2735444fa99bc7c88ffb2909153d"}, + {file = "tensorflow_macos-2.11.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:db97cd91b905bd01069069f07325a2a291705222eb4914148b9574090a5815ae"}, +] + +[package.dependencies] +absl-py = ">=1.0.0" +astunparse = ">=1.6.0" +flatbuffers = ">=2.0" +gast = ">=0.2.1,<=0.4.0" +google-pasta = ">=0.1.1" +grpcio = ">=1.24.3,<2.0" +h5py = ">=2.9.0" +keras = ">=2.11.0,<2.12" +libclang = ">=13.0.0" +numpy = ">=1.20" +opt-einsum = ">=2.3.2" +packaging = "*" +protobuf = ">=3.9.2,<3.20" +setuptools = "*" +six = ">=1.12.0" +tensorboard = ">=2.11,<2.12" +tensorflow-estimator = ">=2.11.0,<2.12" +termcolor = ">=1.1.0" +typing-extensions = ">=3.6.6" +wrapt = ">=1.11.0" + +[[package]] +name = "tensorflow-text" +version = "2.11.0" +description = "TF.Text is a TensorFlow library of text related ops, modules, and subgraphs." +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "tensorflow_text-2.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9d4797e331da37419f2b19159fbc0f125ed60467340e9a209ab8f8d65856704"}, + {file = "tensorflow_text-2.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4abede4191820ae6d5a7c74f02c335a5f2e2df174eaa38b481b2b82a3330152"}, + {file = "tensorflow_text-2.11.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:49194f85e03a2e3f017ac8e0e3d3927104fa20e6e883b43087cff032fe2cbe14"}, + {file = "tensorflow_text-2.11.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3ea14efeb1d627ed5098e791e95bb98ee6f9f928f9eda785205e184cc20b428"}, + {file = "tensorflow_text-2.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a207ceea4c71a932c35e4d208d7b8c3edc65a5ba0eebfdc9233fc8da546625c9"}, + {file = "tensorflow_text-2.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:506fbea82a1ec566d7d0f771adad589c44727d904311103169466d88236ec2c8"}, + {file = "tensorflow_text-2.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cf0033bf47872b57d46f78d7058db5676f396a9327fa4d063a2c73cce43586ae"}, + {file = "tensorflow_text-2.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56693df33461ab0e7f32549010ca38a8d01291fd67142e0396d0aeb9fcad2e09"}, +] + +[package.dependencies] +tensorflow = {version = ">=2.11.0,<2.12", markers = "platform_machine != \"arm64\" or platform_system != \"Darwin\""} +tensorflow-hub = ">=0.8.0" +tensorflow-macos = {version = ">=2.11.0,<2.12", markers = "platform_machine == \"arm64\" and platform_system == \"Darwin\""} + +[package.extras] +tensorflow-cpu = ["tensorflow-cpu (>=2.11.0,<2.12)"] +tests = ["absl-py", "pytest", "tensorflow-datasets (>=3.2.0)"] + +[[package]] +name = "termcolor" +version = "2.2.0" +description = "ANSI color formatting for output in terminal" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "termcolor-2.2.0-py3-none-any.whl", hash = "sha256:91ddd848e7251200eac969846cbae2dacd7d71c2871e92733289e7e3666f48e7"}, + {file = "termcolor-2.2.0.tar.gz", hash = "sha256:dfc8ac3f350788f23b2947b3e6cfa5a53b630b612e6cd8965a015a776020b99a"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + +[[package]] +name = "terminado" +version = "0.17.1" +description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "terminado-0.17.1-py3-none-any.whl", hash = "sha256:8650d44334eba354dd591129ca3124a6ba42c3d5b70df5051b6921d506fdaeae"}, + {file = "terminado-0.17.1.tar.gz", hash = "sha256:6ccbbcd3a4f8a25a5ec04991f39a0b8db52dfcd487ea0e578d977e6752380333"}, +] + +[package.dependencies] +ptyprocess = {version = "*", markers = "os_name != \"nt\""} +pywinpty = {version = ">=1.1.0", markers = "os_name == \"nt\""} +tornado = ">=6.1.0" + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] +test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] + +[[package]] +name = "thinc" +version = "8.1.9" +description = "A refreshing functional take on deep learning, compatible with your favorite libraries" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "thinc-8.1.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3956a0d80e718bfd9df29e0c476f615880359f07fc02ad7c62bca1fde562f310"}, + {file = "thinc-8.1.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:896431606aa68b2b92201e9aaac57f77fa3a2a5c46f17de47b3f0293c22e5364"}, + {file = "thinc-8.1.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44c5c9dfb4838b147d838f4186cca7397b16dfaf96251a3b2e0032521c6b7ea"}, + {file = "thinc-8.1.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1631156be2472b1db2a7af193fd6c22c3371a1ded259e2e12690a13e9b87b704"}, + {file = "thinc-8.1.9-cp310-cp310-win_amd64.whl", hash = "sha256:76dead937164fa67a5abd5c0309ab5636d2db10552258bdcfd47143de08f0b29"}, + {file = "thinc-8.1.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a009eb97cd92ef5fe02c3696975d072583a994fe56291ccff80c4d62191065c6"}, + {file = "thinc-8.1.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5be1c72f5cf83102640cba9e438209dcda339c06b741fb03f6a7b7741e537c98"}, + {file = "thinc-8.1.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dca9fddc12532503b4b1af5eb3f419154e5ef7425a0d55dd30c8c4655469016e"}, + {file = "thinc-8.1.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b5fbf8093ae87a338eeeb932180a69cb4ed5a39a7185c686fcc6f9c619e95ee"}, + {file = "thinc-8.1.9-cp311-cp311-win_amd64.whl", hash = "sha256:ce3ad68023d33bac7feb3eda64a4aae3de39abe9ebc8ef8d2e7a28c4b8598086"}, + {file = "thinc-8.1.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d77d0ff7bdd6b3f164d66fa6c764b1f6bee90348133a303da62beb8a1f8071"}, + {file = "thinc-8.1.9-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb86ffcbc00a1866163d22e881da98b5c7ff8697f6bbaa1a3ded9c549beab227"}, + {file = "thinc-8.1.9-cp36-cp36m-win_amd64.whl", hash = "sha256:4f7888e7c0667d110a2c99ba62002260fd5d0c58f6a522043fa07fb2bb590d80"}, + {file = "thinc-8.1.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cf4e786c79266680b5ec91b9bdb4ca60e957c657f60cc852963d64b912f581"}, + {file = "thinc-8.1.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:978a9a7b6a36b73d5c104ff16be146f5f0b3eb24d86d1a6f020d484944e134cc"}, + {file = "thinc-8.1.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:041b872021b9eae52b8426079cea6c78d493f3832d01f50f0416d663de9dad0c"}, + {file = "thinc-8.1.9-cp37-cp37m-win_amd64.whl", hash = "sha256:97378f9f8a8ca2b3332a213433a5290200cc0b219fb4ba80d8c5f6e44a2cd14e"}, + {file = "thinc-8.1.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8d70a50f350ceaf9328a5cfe41e7a9b19620e476bab862527abe050e01b8f84d"}, + {file = "thinc-8.1.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e1cab98b8b4d5ed3d35d28d0b20322140bd71cca4063081cb15bcde29677ccc8"}, + {file = "thinc-8.1.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d293b0e5bbdc6b7047486499d70136a473b2c8040a4ac15016eb12b77f63d201"}, + {file = "thinc-8.1.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:008a6f9d07b74c31ada9f4a5cfe6d1d74090b8a292781ada11ce16b766815dfe"}, + {file = "thinc-8.1.9-cp38-cp38-win_amd64.whl", hash = "sha256:ed540675b905c527d3596ecf070fd96213eba5f1f5c9c465e1e69b1c0bdee5cc"}, + {file = "thinc-8.1.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21ef468e9c11ed6f1da8b3388c0daac8f07de8a668395390569fb90bae25c33c"}, + {file = "thinc-8.1.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ccaa668f7b303326fc610c9153de6e014a4f36cc93304a3c8705b64c294059"}, + {file = "thinc-8.1.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81514d51e269cc0ff80527359dba4d38b7e7eba33f92e93c0e11319ae40c2857"}, + {file = "thinc-8.1.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:487e391a9bc949812ce76f4ede6b0d8d715b03a30349f81ed9e3dde4cd84a500"}, + {file = "thinc-8.1.9-cp39-cp39-win_amd64.whl", hash = "sha256:86a9da4121e81aee3b14c4265fac4e05cd1c8a8bcba962dc7f7ccb118f215eb6"}, + {file = "thinc-8.1.9.tar.gz", hash = "sha256:8a1e65529c6d0796271d2a7e5ca6ea013fcb7dad69ec609d5093a25808107f51"}, +] + +[package.dependencies] +blis = ">=0.7.8,<0.8.0" +catalogue = ">=2.0.4,<2.1.0" +confection = ">=0.0.1,<1.0.0" +cymem = ">=2.0.2,<2.1.0" +murmurhash = ">=1.0.2,<1.1.0" +numpy = ">=1.15.0" +packaging = ">=20.0" +preshed = ">=3.0.2,<3.1.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.11.0" +setuptools = "*" +srsly = ">=2.4.0,<3.0.0" +wasabi = ">=0.8.1,<1.2.0" + +[package.extras] +cuda = ["cupy (>=5.0.0b4)"] +cuda-autodetect = ["cupy-wheel (>=11.0.0)"] +cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] +cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] +cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] +cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] +cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] +cuda112 = ["cupy-cuda112 (>=5.0.0b4)"] +cuda113 = ["cupy-cuda113 (>=5.0.0b4)"] +cuda114 = ["cupy-cuda114 (>=5.0.0b4)"] +cuda115 = ["cupy-cuda115 (>=5.0.0b4)"] +cuda116 = ["cupy-cuda116 (>=5.0.0b4)"] +cuda117 = ["cupy-cuda117 (>=5.0.0b4)"] +cuda11x = ["cupy-cuda11x (>=11.0.0)"] +cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] +cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] +cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] +cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] +datasets = ["ml-datasets (>=0.2.0,<0.3.0)"] +mxnet = ["mxnet (>=1.5.1,<1.6.0)"] +tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"] +torch = ["torch (>=1.6.0)"] + +[[package]] +name = "threadpoolctl" +version = "3.1.0" +description = "threadpoolctl" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, + {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, +] + +[[package]] +name = "tiktoken" +version = "0.3.2" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" +optional = true +python-versions = ">=3.8" +files = [ + {file = "tiktoken-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90e6c209421ffc42e309703b5cbcfe3e56fccc3ff31ab4b10e70ff9041268c7b"}, + {file = "tiktoken-0.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6fffa2e0baf80608eb05f961f5a577272e6d5dc630907eca8fed7781d726fbb3"}, + {file = "tiktoken-0.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:093b3d81adc8e7669b66478a2f68ac609f2c96cc598bdeca58e476677b90b71c"}, + {file = "tiktoken-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c05147ec16ecc7cddea4298b69a8e4da171639fe057b2d88f52ebdae5a47b001"}, + {file = "tiktoken-0.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:39ac56147d61ff31cda9853197b0a99cb9c44d63bd53de8fdaad724707b378fb"}, + {file = "tiktoken-0.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b0de454c05f19c9d32258194aaf890bf152007b2da8b4b249a10d140c59158cf"}, + {file = "tiktoken-0.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:40f3f807f5efbeaeb1ef302265954a4f0e8fd00d370ecb5c7560da5a99e98eff"}, + {file = "tiktoken-0.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:05942949039fa9347dab08ff6e179912b332ba75539ab17916d52334ff7951f3"}, + {file = "tiktoken-0.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fbb048a1d01c8b88bbaba882e6d990ae76e61afc25babc84bb38494bd88f6e3"}, + {file = "tiktoken-0.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c283730a986fcfee6141196a52a75883e49e45355e2bf3009e9fc530bee798ce"}, + {file = "tiktoken-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f577e0ee8fe9d95a8223ffd88c4cdc2617f4b964a306d3a8303fa151a0b9930b"}, + {file = "tiktoken-0.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1e00bcd088e5c1edf0f86df24097d23dc05dbdafc8a4f5f572ef9051f6a98627"}, + {file = "tiktoken-0.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:162fddfa2e8166c42e91cb4cfda0712a4e17311258f865e9cc83c63736ce07fe"}, + {file = "tiktoken-0.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:2f81ed783bbdf00a0646e02f29cfcfccb21ad355bdbb6ed8e67f89dc6e203255"}, + {file = "tiktoken-0.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9714e292eab5892094ab4448b904898c6a1098041922fe103e5f3692658d6a46"}, + {file = "tiktoken-0.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5f65b7646bbeeee42f6d65928d6ac72ac275525f4b72ecaf8b660eae96fc90a5"}, + {file = "tiktoken-0.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7839e211a438b74e84ae572b609aad4a149cc7a3092988664fd537519c29036"}, + {file = "tiktoken-0.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63a636f7cfd3eefa9fb3367782a5c9bde1a00b4717931ad31725894ab4b5c74e"}, + {file = "tiktoken-0.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a7ab927d64451b76084fe5276a0a5ea939eed8f2424b92cca0a1888578200f00"}, + {file = "tiktoken-0.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8a93400c84852acde066d66acb89a1ef1f3ce55dbf5f7bbcb70464d4e822c35a"}, + {file = "tiktoken-0.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:7654feff0a6b90907016416a91312dc93d19122d9de92d66a7b6ad2308fb6679"}, + {file = "tiktoken-0.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e56506517aa0d2a57ffd6c1089908776f13404d435e92cc3a21fd031e364eeee"}, + {file = "tiktoken-0.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ecbb54133c232de82715a2a07ae0874fb5d9ea084e0b8034971b7d4cf9260546"}, + {file = "tiktoken-0.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55dbb736cd4fcc8f3a78e8c2c53e5ce02f45c706291e60e8663a982e00ad7198"}, + {file = "tiktoken-0.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d76be63a9c10ddca548383d443846e54fe004df544a390424110bf5ec89a2e8f"}, + {file = "tiktoken-0.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9530978b499af8b77291087782673994a32cf80d4bd408c5d928d91631fcede1"}, + {file = "tiktoken-0.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cff38728823adf05d9600cf494d3ccf87d0087286675d5baed8286481da85696"}, + {file = "tiktoken-0.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:2d113493c1e7b6db24b194c19992da7bdb1574237a02f8289147e9a69907b6c9"}, + {file = "tiktoken-0.3.2.tar.gz", hash = "sha256:a51b5449e883e409cf2f4a846a6a97962d5656a354a5532c330811c833ac3b37"}, +] + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + +[[package]] +name = "tinycss2" +version = "1.2.1" +description = "A tiny CSS parser" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tinycss2-1.2.1-py3-none-any.whl", hash = "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847"}, + {file = "tinycss2-1.2.1.tar.gz", hash = "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627"}, +] + +[package.dependencies] +webencodings = ">=0.4" + +[package.extras] +doc = ["sphinx", "sphinx_rtd_theme"] +test = ["flake8", "isort", "pytest"] + +[[package]] +name = "tokenizers" +version = "0.13.2" +description = "Fast and Customizable Tokenizers" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "tokenizers-0.13.2-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:a6f36b1b499233bb4443b5e57e20630c5e02fba61109632f5e00dab970440157"}, + {file = "tokenizers-0.13.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:bc6983282ee74d638a4b6d149e5dadd8bc7ff1d0d6de663d69f099e0c6bddbeb"}, + {file = "tokenizers-0.13.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16756e6ab264b162f99c0c0a8d3d521328f428b33374c5ee161c0ebec42bf3c0"}, + {file = "tokenizers-0.13.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b10db6e4b036c78212c6763cb56411566edcf2668c910baa1939afd50095ce48"}, + {file = "tokenizers-0.13.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:238e879d1a0f4fddc2ce5b2d00f219125df08f8532e5f1f2ba9ad42f02b7da59"}, + {file = "tokenizers-0.13.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47ef745dbf9f49281e900e9e72915356d69de3a4e4d8a475bda26bfdb5047736"}, + {file = "tokenizers-0.13.2-cp310-cp310-win32.whl", hash = "sha256:96cedf83864bcc15a3ffd088a6f81a8a8f55b8b188eabd7a7f2a4469477036df"}, + {file = "tokenizers-0.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:eda77de40a0262690c666134baf19ec5c4f5b8bde213055911d9f5a718c506e1"}, + {file = "tokenizers-0.13.2-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:9eee037bb5aa14daeb56b4c39956164b2bebbe6ab4ca7779d88aa16b79bd4e17"}, + {file = "tokenizers-0.13.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d1b079c4c9332048fec4cb9c2055c2373c74fbb336716a5524c9a720206d787e"}, + {file = "tokenizers-0.13.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a689654fc745135cce4eea3b15e29c372c3e0b01717c6978b563de5c38af9811"}, + {file = "tokenizers-0.13.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3606528c07cda0566cff6cbfbda2b167f923661be595feac95701ffcdcbdbb21"}, + {file = "tokenizers-0.13.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41291d0160946084cbd53c8ec3d029df3dc2af2673d46b25ff1a7f31a9d55d51"}, + {file = "tokenizers-0.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7892325f9ca1cc5fca0333d5bfd96a19044ce9b092ce2df625652109a3de16b8"}, + {file = "tokenizers-0.13.2-cp311-cp311-win32.whl", hash = "sha256:93714958d4ebe5362d3de7a6bd73dc86c36b5af5941ebef6c325ac900fa58865"}, + {file = "tokenizers-0.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:fa7ef7ee380b1f49211bbcfac8a006b1a3fa2fa4c7f4ee134ae384eb4ea5e453"}, + {file = "tokenizers-0.13.2-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:da521bfa94df6a08a6254bb8214ea04854bb9044d61063ae2529361688b5440a"}, + {file = "tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a739d4d973d422e1073989769723f3b6ad8b11e59e635a63de99aea4b2208188"}, + {file = "tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cac01fc0b868e4d0a3aa7c5c53396da0a0a63136e81475d32fcf5c348fcb2866"}, + {file = "tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0901a5c6538d2d2dc752c6b4bde7dab170fddce559ec75662cfad03b3187c8f6"}, + {file = "tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ba9baa76b5a3eefa78b6cc351315a216232fd727ee5e3ce0f7c6885d9fb531b"}, + {file = "tokenizers-0.13.2-cp37-cp37m-win32.whl", hash = "sha256:a537061ee18ba104b7f3daa735060c39db3a22c8a9595845c55b6c01d36c5e87"}, + {file = "tokenizers-0.13.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c82fb87b1cbfa984d8f05b2b3c3c73e428b216c1d4f0e286d0a3b27f521b32eb"}, + {file = "tokenizers-0.13.2-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:ce298605a833ac7f81b8062d3102a42dcd9fa890493e8f756112c346339fe5c5"}, + {file = "tokenizers-0.13.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:f44d59bafe3d61e8a56b9e0a963075187c0f0091023120b13fbe37a87936f171"}, + {file = "tokenizers-0.13.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a51b93932daba12ed07060935978a6779593a59709deab04a0d10e6fd5c29e60"}, + {file = "tokenizers-0.13.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6969e5ea7ccb909ce7d6d4dfd009115dc72799b0362a2ea353267168667408c4"}, + {file = "tokenizers-0.13.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:92f040c4d938ea64683526b45dfc81c580e3b35aaebe847e7eec374961231734"}, + {file = "tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3bc9f7d7f4c1aa84bb6b8d642a60272c8a2c987669e9bb0ac26daf0c6a9fc8"}, + {file = "tokenizers-0.13.2-cp38-cp38-win32.whl", hash = "sha256:efbf189fb9cf29bd29e98c0437bdb9809f9de686a1e6c10e0b954410e9ca2142"}, + {file = "tokenizers-0.13.2-cp38-cp38-win_amd64.whl", hash = "sha256:0b4cb2c60c094f31ea652f6cf9f349aae815f9243b860610c29a69ed0d7a88f8"}, + {file = "tokenizers-0.13.2-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:b47d6212e7dd05784d7330b3b1e5a170809fa30e2b333ca5c93fba1463dec2b7"}, + {file = "tokenizers-0.13.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:80a57501b61ec4f94fb7ce109e2b4a1a090352618efde87253b4ede6d458b605"}, + {file = "tokenizers-0.13.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61507a9953f6e7dc3c972cbc57ba94c80c8f7f686fbc0876afe70ea2b8cc8b04"}, + {file = "tokenizers-0.13.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c09f4fa620e879debdd1ec299bb81e3c961fd8f64f0e460e64df0818d29d845c"}, + {file = "tokenizers-0.13.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:66c892d85385b202893ac6bc47b13390909e205280e5df89a41086cfec76fedb"}, + {file = "tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e306b0941ad35087ae7083919a5c410a6b672be0343609d79a1171a364ce79"}, + {file = "tokenizers-0.13.2-cp39-cp39-win32.whl", hash = "sha256:79189e7f706c74dbc6b753450757af172240916d6a12ed4526af5cc6d3ceca26"}, + {file = "tokenizers-0.13.2-cp39-cp39-win_amd64.whl", hash = "sha256:486d637b413fddada845a10a45c74825d73d3725da42ccd8796ccd7a1c07a024"}, + {file = "tokenizers-0.13.2.tar.gz", hash = "sha256:f9525375582fd1912ac3caa2f727d36c86ff8c0c6de45ae1aaff90f87f33b907"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "torch" +version = "1.13.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +category = "main" +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:fd12043868a34a8da7d490bf6db66991108b00ffbeecb034228bfcbbd4197143"}, + {file = "torch-1.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d9fe785d375f2e26a5d5eba5de91f89e6a3be5d11efb497e76705fdf93fa3c2e"}, + {file = "torch-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:98124598cdff4c287dbf50f53fb455f0c1e3a88022b39648102957f3445e9b76"}, + {file = "torch-1.13.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:393a6273c832e047581063fb74335ff50b4c566217019cc6ace318cd79eb0566"}, + {file = "torch-1.13.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:0122806b111b949d21fa1a5f9764d1fd2fcc4a47cb7f8ff914204fd4fc752ed5"}, + {file = "torch-1.13.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:22128502fd8f5b25ac1cd849ecb64a418382ae81dd4ce2b5cebaa09ab15b0d9b"}, + {file = "torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:76024be052b659ac1304ab8475ab03ea0a12124c3e7626282c9c86798ac7bc11"}, + {file = "torch-1.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:ea8dda84d796094eb8709df0fcd6b56dc20b58fdd6bc4e8d7109930dafc8e419"}, + {file = "torch-1.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2ee7b81e9c457252bddd7d3da66fb1f619a5d12c24d7074de91c4ddafb832c93"}, + {file = "torch-1.13.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:0d9b8061048cfb78e675b9d2ea8503bfe30db43d583599ae8626b1263a0c1380"}, + {file = "torch-1.13.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:f402ca80b66e9fbd661ed4287d7553f7f3899d9ab54bf5c67faada1555abde28"}, + {file = "torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:727dbf00e2cf858052364c0e2a496684b9cb5aa01dc8a8bc8bbb7c54502bdcdd"}, + {file = "torch-1.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:df8434b0695e9ceb8cc70650afc1310d8ba949e6db2a0525ddd9c3b2b181e5fe"}, + {file = "torch-1.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:5e1e722a41f52a3f26f0c4fcec227e02c6c42f7c094f32e49d4beef7d1e213ea"}, + {file = "torch-1.13.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:33e67eea526e0bbb9151263e65417a9ef2d8fa53cbe628e87310060c9dcfa312"}, + {file = "torch-1.13.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:eeeb204d30fd40af6a2d80879b46a7efbe3cf43cdbeb8838dd4f3d126cc90b2b"}, + {file = "torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:50ff5e76d70074f6653d191fe4f6a42fdbe0cf942fbe2a3af0b75eaa414ac038"}, + {file = "torch-1.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2c3581a3fd81eb1f0f22997cddffea569fea53bafa372b2c0471db373b26aafc"}, + {file = "torch-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0aa46f0ac95050c604bcf9ef71da9f1172e5037fdf2ebe051962d47b123848e7"}, + {file = "torch-1.13.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6930791efa8757cb6974af73d4996b6b50c592882a324b8fb0589c6a9ba2ddaf"}, + {file = "torch-1.13.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e0df902a7c7dd6c795698532ee5970ce898672625635d885eade9976e5a04949"}, +] + +[package.dependencies] +nvidia-cublas-cu11 = {version = "11.10.3.66", markers = "platform_system == \"Linux\""} +nvidia-cuda-nvrtc-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} +nvidia-cuda-runtime-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} +nvidia-cudnn-cu11 = {version = "8.5.0.96", markers = "platform_system == \"Linux\""} +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchvision" +version = "0.14.1" +description = "image and video datasets and models for torch deep learning" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "torchvision-0.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb05dd9dd3af5428fee525400759daf8da8e4caec45ddd6908cfb36571f6433"}, + {file = "torchvision-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8d0766ea92affa7af248e327dd85f7c9cfdf51a57530b43212d4e1858548e9d7"}, + {file = "torchvision-0.14.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:6d7b35653113664ea3fdcb71f515cfbf29d2fe393000fd8aaff27a1284de6908"}, + {file = "torchvision-0.14.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:8a9eb773a2fa8f516e404ac09c059fb14e6882c48fdbb9c946327d2ce5dba6cd"}, + {file = "torchvision-0.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:13986f0c15377ff23039e1401012ccb6ecf71024ce53def27139e4eac5a57592"}, + {file = "torchvision-0.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fb7a793fd33ce1abec24b42778419a3fb1e3159d7dfcb274a3ca8fb8cbc408dc"}, + {file = "torchvision-0.14.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:89fb0419780ec9a9eb9f7856a0149f6ac9f956b28f44b0c0080c6b5b48044db7"}, + {file = "torchvision-0.14.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:a2d4237d3c9705d7729eb4534e4eb06f1d6be7ff1df391204dfb51586d9b0ecb"}, + {file = "torchvision-0.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:92a324712a87957443cc34223274298ae9496853f115c252f8fc02b931f2340e"}, + {file = "torchvision-0.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:68ed03359dcd3da9cd21b8ab94da21158df8a6a0c5bad0bf4a42f0e448d28cb3"}, + {file = "torchvision-0.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:30fcf0e9fe57d4ac4ce6426659a57dce199637ccb6c70be1128670f177692624"}, + {file = "torchvision-0.14.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0ed02aefd09bf1114d35f1aa7dce55aa61c2c7e57f9aa02dce362860be654e85"}, + {file = "torchvision-0.14.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:a541e49fc3c4e90e49e6988428ab047415ed52ea97d0c0bfd147d8bacb8f4df8"}, + {file = "torchvision-0.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:6099b3191dc2516099a32ae38a5fb349b42e863872a13545ab1a524b6567be60"}, + {file = "torchvision-0.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c5e744f56e5f5b452deb5fc0f3f2ba4d2f00612d14d8da0dbefea8f09ac7690b"}, + {file = "torchvision-0.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:758b20d079e810b4740bd60d1eb16e49da830e3360f9be379eb177ee221fa5d4"}, + {file = "torchvision-0.14.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:83045507ef8d3c015d4df6be79491375b2f901352cfca6e72b4723e9c4f9a55d"}, + {file = "torchvision-0.14.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:eaed58cf454323ed9222d4e0dd5fb897064f454b400696e03a5200e65d3a1e76"}, + {file = "torchvision-0.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:b337e1245ca4353623dd563c03cd8f020c2496a7c5d12bba4d2e381999c766e0"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=5.3.0,<8.3.0 || >=8.4.0" +requests = "*" +torch = "1.13.1" +typing-extensions = "*" + +[package.extras] +scipy = ["scipy"] + +[[package]] +name = "tornado" +version = "6.2" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +category = "dev" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "tornado-6.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72"}, + {file = "tornado-6.2-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9"}, + {file = "tornado-6.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac"}, + {file = "tornado-6.2-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75"}, + {file = "tornado-6.2-cp37-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e"}, + {file = "tornado-6.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8"}, + {file = "tornado-6.2-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b"}, + {file = "tornado-6.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca"}, + {file = "tornado-6.2-cp37-abi3-win32.whl", hash = "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23"}, + {file = "tornado-6.2-cp37-abi3-win_amd64.whl", hash = "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"}, + {file = "tornado-6.2.tar.gz", hash = "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13"}, +] + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, + {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "traitlets" +version = "5.9.0" +description = "Traitlets Python configuration system" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "traitlets-5.9.0-py3-none-any.whl", hash = "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8"}, + {file = "traitlets-5.9.0.tar.gz", hash = "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9"}, +] + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] +test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] + +[[package]] +name = "transformers" +version = "4.27.3" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +category = "main" +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "transformers-4.27.3-py3-none-any.whl", hash = "sha256:d764c351b6d590952ac6104573721abe323b3eabd55e53bae0215c14750d5f13"}, + {file = "transformers-4.27.3.tar.gz", hash = "sha256:26bff783ad6ed463fe3f0954406e7c30a8313758b6488677827886e48e6bd5a9"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.11.0,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.10.0)"] +all = ["Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.12)", "tensorflow-text", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.7,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.10.0)", "deepspeed (>=0.6.5)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.10.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.6.5)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.4,<2.12)", "tensorflow-text", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.7,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.4,<2.12)", "tensorflow-text", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.7,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +docs = ["Pillow", "accelerate (>=0.10.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1)", "hf-doc-builder", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.12)", "tensorflow-text", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.7,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "optax (>=0.0.8)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.4)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf (<=3.20.2)", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.4,<2.12)", "tensorflow-text", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.4,<2.12)", "tensorflow-text", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +torch = ["torch (>=1.7,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.11.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.2)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.7,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow"] + +[[package]] +name = "typer" +version = "0.7.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "typer-0.7.0-py3-none-any.whl", hash = "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d"}, + {file = "typer-0.7.0.tar.gz", hash = "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + +[[package]] +name = "types-pyopenssl" +version = "23.1.0.0" +description = "Typing stubs for pyOpenSSL" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-pyOpenSSL-23.1.0.0.tar.gz", hash = "sha256:acc153718bff497e8f6ca3beecb5ea7a3087c796e40d569fded8bafbfca73605"}, + {file = "types_pyOpenSSL-23.1.0.0-py3-none-any.whl", hash = "sha256:9dacec020a3484ef5e4ea4bd9d403a981765b80821d5a40b790b2ba2f09d58db"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" + +[[package]] +name = "types-pyyaml" +version = "6.0.12.8" +description = "Typing stubs for PyYAML" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.8.tar.gz", hash = "sha256:19304869a89d49af00be681e7b267414df213f4eb89634c4495fa62e8f942b9f"}, + {file = "types_PyYAML-6.0.12.8-py3-none-any.whl", hash = "sha256:5314a4b2580999b2ea06b2e5f9a7763d860d6e09cdf21c0e9561daa9cbd60178"}, +] + +[[package]] +name = "types-redis" +version = "4.5.3.0" +description = "Typing stubs for redis" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-redis-4.5.3.0.tar.gz", hash = "sha256:f23415e448ca25ec5028c24fdf3717a13f0c905eb1933733e8a8a7d4952f6908"}, + {file = "types_redis-4.5.3.0-py3-none-any.whl", hash = "sha256:7c1d5fdb0a2d5fd92eac37ce382fdb47d99a69889e7d6c2bc4479148ac646c73"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + +[[package]] +name = "types-requests" +version = "2.28.11.16" +description = "Typing stubs for requests" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.28.11.16.tar.gz", hash = "sha256:9d4002056df7ebc4ec1f28fd701fba82c5c22549c4477116cb2656aa30ace6db"}, + {file = "types_requests-2.28.11.16-py3-none-any.whl", hash = "sha256:a86921028335fdcc3aaf676c9d3463f867db6af2303fc65aa309b13ae1e6dd53"}, +] + +[package.dependencies] +types-urllib3 = "<1.27" + +[[package]] +name = "types-toml" +version = "0.10.8.5" +description = "Typing stubs for toml" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-toml-0.10.8.5.tar.gz", hash = "sha256:bf80fce7d2d74be91148f47b88d9ae5adeb1024abef22aa2fdbabc036d6b8b3c"}, + {file = "types_toml-0.10.8.5-py3-none-any.whl", hash = "sha256:2432017febe43174af0f3c65f03116e3d3cf43e7e1406b8200e106da8cf98992"}, +] + +[[package]] +name = "types-urllib3" +version = "1.26.25.9" +description = "Typing stubs for urllib3" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.9.tar.gz", hash = "sha256:160727879bdbe52f11f5feeca092a473f38d68ed3be88abb461b59cda40fb9bc"}, + {file = "types_urllib3-1.26.25.9-py3-none-any.whl", hash = "sha256:b327d360ba4a9edd80ea82f5990ba19e76175a20b5b64be4b4813d9a1c424caa"}, +] + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] + +[[package]] +name = "typing-inspect" +version = "0.8.0" +description = "Runtime inspection utilities for typing module." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.8.0-py3-none-any.whl", hash = "sha256:5fbf9c1e65d4fa01e701fe12a5bca6c6e08a4ffd5bc60bfac028253a447c5188"}, + {file = "typing_inspect-0.8.0.tar.gz", hash = "sha256:8b1ff0c400943b6145df8119c41c244ca8207f1f10c9c057aeed1560e4806e3d"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "uri-template" +version = "1.2.0" +description = "RFC 6570 URI Template Processor" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "uri_template-1.2.0-py3-none-any.whl", hash = "sha256:f1699c77b73b925cf4937eae31ab282a86dc885c333f2e942513f08f691fc7db"}, + {file = "uri_template-1.2.0.tar.gz", hash = "sha256:934e4d09d108b70eb8a24410af8615294d09d279ce0e7cbcdaef1bd21f932b06"}, +] + +[package.extras] +dev = ["flake8 (<4.0.0)", "flake8-annotations", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-noqa", "flake8-requirements", "flake8-type-annotations", "flake8-use-fstring", "mypy", "pep8-naming"] + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, +] + +[[package]] +name = "urllib3" +version = "1.26.15" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"}, + {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "validators" +version = "0.19.0" +description = "Python Data Validation for Humans™." +category = "main" +optional = true +python-versions = ">=3.4" +files = [ + {file = "validators-0.19.0.tar.gz", hash = "sha256:dec45f4381f042f1e705cfa74949505b77f1e27e8b05409096fee8152c839cbe"}, +] + +[package.dependencies] +decorator = ">=3.4.0" + +[package.extras] +test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] + +[[package]] +name = "wasabi" +version = "1.1.1" +description = "A lightweight console printing and formatting toolkit" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "wasabi-1.1.1-py3-none-any.whl", hash = "sha256:32e44649d99a64e08e40c1c96cddb69fad460bd0cc33802a53cab6714dfb73f8"}, + {file = "wasabi-1.1.1.tar.gz", hash = "sha256:f5ee7c609027811bd16e620f2fd7a7319466005848e41b051a62053ab8fd70d6"}, +] + +[package.dependencies] +colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\" and python_version >= \"3.7\""} + +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7"}, + {file = "watchdog-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8"}, + {file = "watchdog-3.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100"}, + {file = "watchdog-3.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346"}, + {file = "watchdog-3.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"}, + {file = "watchdog-3.0.0-py3-none-win32.whl", hash = "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f"}, + {file = "watchdog-3.0.0-py3-none-win_amd64.whl", hash = "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c"}, + {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, + {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[[package]] +name = "wcwidth" +version = "0.2.6" +description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.6-py2.py3-none-any.whl", hash = "sha256:795b138f6875577cd91bba52baf9e445cd5118fd32723b460e30a0af30ea230e"}, + {file = "wcwidth-0.2.6.tar.gz", hash = "sha256:a5220780a404dbe3353789870978e472cfe477761f06ee55077256e509b156d0"}, +] + +[[package]] +name = "weaviate-client" +version = "3.15.3" +description = "A python native weaviate client" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "weaviate-client-3.15.3.tar.gz", hash = "sha256:7ad0ef2d23c3c082e1beeb14719a93f6a2b090a534d4aa45d9d720fff6d58edc"}, + {file = "weaviate_client-3.15.3-py3-none-any.whl", hash = "sha256:f269caec18b45454913a575f3e8f136dc914bbe8b9b10b55919cc6ccc142a67c"}, +] + +[package.dependencies] +authlib = ">=1.1.0" +requests = ">=2.28.0,<2.29.0" +tqdm = ">=4.59.0,<5.0.0" +validators = ">=0.18.2,<0.20.0" + +[[package]] +name = "webcolors" +version = "1.12" +description = "A library for working with color names and color values formats defined by HTML and CSS." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "webcolors-1.12-py3-none-any.whl", hash = "sha256:d98743d81d498a2d3eaf165196e65481f0d2ea85281463d856b1e51b09f62dce"}, + {file = "webcolors-1.12.tar.gz", hash = "sha256:16d043d3a08fd6a1b1b7e3e9e62640d09790dce80d2bdd4792a175b35fe794a9"}, +] + +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] + +[[package]] +name = "websocket-client" +version = "1.5.1" +description = "WebSocket client for Python with low level API options" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "websocket-client-1.5.1.tar.gz", hash = "sha256:3f09e6d8230892547132177f575a4e3e73cfdf06526e20cc02aa1c3b47184d40"}, + {file = "websocket_client-1.5.1-py3-none-any.whl", hash = "sha256:cdf5877568b7e83aa7cf2244ab56a3213de587bbe0ce9d8b9600fc77b455d89e"}, +] + +[package.extras] +docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[[package]] +name = "werkzeug" +version = "2.2.3" +description = "The comprehensive WSGI web application library." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"}, + {file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog"] + +[[package]] +name = "wheel" +version = "0.40.0" +description = "A built-package format for Python" +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "wheel-0.40.0-py3-none-any.whl", hash = "sha256:d236b20e7cb522daf2390fa84c55eea81c5c30190f90f29ae2ca1ad8355bf247"}, + {file = "wheel-0.40.0.tar.gz", hash = "sha256:cd1196f3faee2b31968d626e1731c94f99cbdb67cf5a46e4f5656cbee7738873"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)"] + +[[package]] +name = "widgetsnbextension" +version = "4.0.6" +description = "Jupyter interactive widgets for Jupyter Notebook" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "widgetsnbextension-4.0.6-py3-none-any.whl", hash = "sha256:7df2bffa274b0b416c1fa0789e321451858a9e276e1220b40a16cc994192e2b7"}, + {file = "widgetsnbextension-4.0.6.tar.gz", hash = "sha256:1a07d06c881a7c16ca7ab4541b476edbe2e404f5c5f0cf524ffa2406a8bd7c80"}, +] + +[[package]] +name = "wikipedia" +version = "1.4.0" +description = "Wikipedia API for Python" +category = "main" +optional = true +python-versions = "*" +files = [ + {file = "wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +requests = ">=2.0.0,<3.0.0" + +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +category = "main" +optional = true +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + +[[package]] +name = "wolframalpha" +version = "5.0.0" +description = "Wolfram|Alpha 2.0 API client" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "wolframalpha-5.0.0-py3-none-any.whl", hash = "sha256:159f5d8fd31e4a734a34a9f3ae8aec4e9b2ef392607f82069b4a324b6b1831d5"}, + {file = "wolframalpha-5.0.0.tar.gz", hash = "sha256:38bf27654039ec85cc62c199dd319b6a4d6a7badfed7af1cd161f081afdb57c0"}, +] + +[package.dependencies] +"jaraco.context" = "*" +more-itertools = "*" +xmltodict = "*" + +[package.extras] +docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] +testing = ["keyring", "pmxbot", "pytest (>=3.5,!=3.7.3)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-enabler", "pytest-flake8", "pytest-mypy"] + +[[package]] +name = "wonderwords" +version = "2.2.0" +description = "A python package for random words and sentences in the english language" +category = "main" +optional = true +python-versions = ">=3.6" +files = [ + {file = "wonderwords-2.2.0-py3-none-any.whl", hash = "sha256:65fc665f1f5590e98f6d9259414ea036bf1b6dd83e51aa6ba44473c99ca92da1"}, + {file = "wonderwords-2.2.0.tar.gz", hash = "sha256:0b7ec6f591062afc55603bfea71463afbab06794b3064d9f7b04d0ce251a13d0"}, +] + +[package.extras] +cli = ["rich (==9.10.0)"] + +[[package]] +name = "wrapt" +version = "1.15.0" +description = "Module for decorators, wrappers and monkey patching." +category = "main" +optional = true +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ + {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"}, + {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"}, + {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"}, + {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"}, + {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"}, + {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"}, + {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"}, + {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"}, + {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"}, + {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"}, + {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"}, + {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"}, + {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"}, + {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"}, + {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"}, + {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"}, + {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"}, + {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, + {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, +] + +[[package]] +name = "xmltodict" +version = "0.13.0" +description = "Makes working with XML feel like you are working with JSON" +category = "main" +optional = true +python-versions = ">=3.4" +files = [ + {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, + {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, +] + +[[package]] +name = "yarl" +version = "1.8.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, + {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, + {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, + {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, + {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, + {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, + {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, + {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, + {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, + {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, + {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, + {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, + {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[extras] +all = ["aleph-alpha-client", "anthropic", "beautifulsoup4", "cohere", "deeplake", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "huggingface_hub", "jinja2", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pgvector", "pinecone-client", "psycopg2-binary", "pypdf", "qdrant-client", "redis", "sentence-transformers", "spacy", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "971401886ab18483c90be1cf1a8cd14d2a84295dc3c9b12d48920dcf4a99adcb" diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 0000000000000000000000000000000000000000..ab1033bd37224ee84b5862fb25f094db73809b74 --- /dev/null +++ b/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..52b0b5d613d2682495511dbefaa3a13c3a8fe16e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,124 @@ +[tool.poetry] +name = "langchain" +version = "0.0.121" +description = "Building applications with LLMs through composability" +authors = [] +license = "MIT" +readme = "README.md" +repository = "https://www.github.com/hwchase17/langchain" + +[tool.poetry.scripts] +langchain-server = "langchain.server:main" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +pydantic = "^1" +SQLAlchemy = "^1" +requests = "^2" +PyYAML = ">=5.4.1" +numpy = "^1" +faiss-cpu = {version = "^1", optional = true} +wikipedia = {version = "^1", optional = true} +elasticsearch = {version = "^8", optional = true} +opensearch-py = {version = "^2.0.0", optional = true} +redis = {version = "^4", optional = true} +manifest-ml = {version = "^0.0.1", optional = true} +spacy = {version = "^3", optional = true} +nltk = {version = "^3", optional = true} +transformers = {version = "^4", optional = true} +beautifulsoup4 = {version = "^4", optional = true} +torch = {version = "^1", optional = true} +jinja2 = {version = "^3", optional = true} +tiktoken = {version = "^0.3.2", optional = true, python="^3.9"} +pinecone-client = {version = "^2", optional = true} +weaviate-client = {version = "^3", optional = true} +google-api-python-client = {version = "2.70.0", optional = true} +wolframalpha = {version = "5.0.0", optional = true} +anthropic = {version = "^0.2.2", optional = true} +qdrant-client = {version = "^1.0.4", optional = true, python = ">=3.8.1,<3.12"} +dataclasses-json = "^0.5.7" +tensorflow-text = {version = "^2.11.0", optional = true, python = "^3.10, <3.12"} +tenacity = "^8.1.0" +cohere = {version = "^3", optional = true} +openai = {version = "^0", optional = true} +nlpcloud = {version = "^1", optional = true} +nomic = {version = "^1.0.43", optional = true} +huggingface_hub = {version = "^0", optional = true} +google-search-results = {version = "^2", optional = true} +sentence-transformers = {version = "^2", optional = true} +aiohttp = "^3.8.3" +pypdf = {version = "^3.4.0", optional = true} +networkx = {version="^2.6.3", optional = true} +aleph-alpha-client = {version="^2.15.0", optional = true} +deeplake = {version = "^3.2.9", optional = true} +pgvector = {version = "^0.1.6", optional = true} +psycopg2-binary = {version = "^2.9.5", optional = true} + + +[tool.poetry.group.docs.dependencies] +autodoc_pydantic = "^1.8.0" +myst_parser = "^0.18.1" +nbsphinx = "^0.8.9" +sphinx = "^4.5.0" +sphinx-autobuild = "^2021.3.14" +sphinx_book_theme = "^0.3.3" +sphinx_rtd_theme = "^1.0.0" +sphinx-typlog-theme = "^0.8.0" +sphinx-panels = "^0.6.0" +toml = "^0.10.2" +myst-nb = "^0.17.1" +linkchecker = "^10.2.1" +sphinx-copybutton = "^0.5.1" + +[tool.poetry.group.test.dependencies] +pytest = "^7.2.0" +pytest-cov = "^4.0.0" +pytest-dotenv = "^0.5.2" +duckdb-engine = "^0.7.0" +pytest-watcher = "^0.2.6" +freezegun = "^1.2.2" +responses = "^0.22.0" +pytest-asyncio = "^0.20.3" + +[tool.poetry.group.lint.dependencies] +ruff = "^0.0.249" +types-toml = "^0.10.8.1" +types-redis = "^4.3.21.6" +black = "^23.1.0" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +types-pyyaml = "^6.0.12.2" +types-requests = "^2.28.11.5" + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +jupyter = "^1.0.0" +playwright = "^1.28.0" + +[tool.poetry.extras] +llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] +all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary"] + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort +] + +[tool.mypy] +ignore_missing_imports = "True" +disallow_untyped_defs = "True" +exclude = ["notebooks"] + +[tool.coverage.run] +omit = [ + "tests/*", +] + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/readthedocs.yml b/readthedocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..cc5b984ef9f9a5791920e3348392a92b4261dadc --- /dev/null +++ b/readthedocs.yml @@ -0,0 +1,11 @@ +version: 2 +sphinx: + configuration: docs/conf.py +formats: + - htmlzip +python: + version: 3.8 + install: + - requirements: docs/requirements.txt + - method: pip + path: . diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c64422947a5c87cd0352ee87b8cff9cb3d5f6040 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,104 @@ +aiohttp==3.8.4 +aiosignal==1.3.1 +appdirs==1.4.4 +async-timeout==4.0.2 +attrs==22.2.0 +beautifulsoup4==4.12.0 +blis==0.7.9 +bs4==0.0.1 +build==0.10.0 +CacheControl==0.12.11 +catalogue==2.0.8 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==3.1.0 +cleo==2.0.1 +click==8.1.3 +confection==0.0.4 +crashtest==0.4.1 +cymem==2.0.7 +distlib==0.3.6 +docker-pycreds==0.4.0 +dulwich==0.21.3 +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl +filelock==3.10.6 +Flask==2.2.3 +Flask-Cors==3.0.10 +frozenlist==1.3.3 +gitdb==4.0.10 +GitPython==3.1.31 +gunicorn==20.1.0 +html5lib==1.1 +idna==3.4 +importlib-metadata==6.1.0 +installer==0.7.0 +itsdangerous==2.1.2 +jaraco.classes==3.2.3 +Jinja2==3.1.2 +jsonschema==4.17.3 +keyring==23.13.1 +langcodes==3.3.0 +lockfile==0.12.2 +MarkupSafe==2.1.2 +more-itertools==9.1.0 +msgpack==1.0.5 +multidict==6.0.4 +murmurhash==1.0.9 +numpy==1.24.2 +openai==0.27.2 +packaging==23.0 +pandas==1.5.3 +pathtools==0.1.2 +pathy==0.10.1 +pexpect==4.8.0 +pkginfo==1.9.6 +platformdirs==2.6.2 +poetry==1.4.1 +poetry-core==1.5.2 +poetry-plugin-export==1.3.0 +preshed==3.0.8 +protobuf==4.22.1 +psutil==5.9.4 +psycopg2-binary==2.9.5 +ptyprocess==0.7.0 +pycparser==2.21 +pydantic==1.10.7 +pyphen==0.14.0 +pyproject_hooks==1.0.0 +pyrsistent==0.19.3 +python-dateutil==2.8.2 +pytz==2023.2 +PyYAML==6.0 +rapidfuzz==2.13.7 +requests==2.28.2 +requests-toolbelt==0.10.1 +sentry-sdk==1.17.0 +setproctitle==1.3.2 +shellingham==1.5.0.post1 +six==1.16.0 +smart-open==6.3.0 +smmap==5.0.0 +soupsieve==2.4 +spacy==3.5.1 +spacy-legacy==3.0.12 +spacy-loggers==1.0.4 +SQLAlchemy==2.0.7 +srsly==2.4.6 +tenacity==8.2.2 +textstat==0.7.3 +thinc==8.1.9 +tomli==2.0.1 +tomlkit==0.11.6 +tqdm==4.65.0 +trove-classifiers==2023.3.9 +typer==0.7.0 +typing_extensions==4.5.0 +urllib3==1.26.15 +virtualenv==20.21.0 +wandb==0.14.0 +wasabi==1.1.1 +webencodings==0.5.1 +Werkzeug==2.2.3 +xattr==0.10.1 +yarl==1.8.2 +zipp==3.15.0 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000000000000000000000000000000000000..c6f7782f61975eb26db83a0e0f32e62ec86d4b1e --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.9.13 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4c210e3375ddf817d5184500833d491e257c846a --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""All tests for this package.""" diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a076291f33f1fea15bac4cde813b6c74d0c07204 --- /dev/null +++ b/tests/integration_tests/__init__.py @@ -0,0 +1 @@ +"""All integration tests (tests that call out to an external API).""" diff --git a/tests/integration_tests/chains/__init__.py b/tests/integration_tests/chains/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ca2420123d3aa11eceecb620d0e948e129fa8d6 --- /dev/null +++ b/tests/integration_tests/chains/__init__.py @@ -0,0 +1 @@ +"""All integration tests for chains.""" diff --git a/tests/integration_tests/chains/test_memory.py b/tests/integration_tests/chains/test_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..af934de59a0d2ab9b585765b52596e372a53fd13 --- /dev/null +++ b/tests/integration_tests/chains/test_memory.py @@ -0,0 +1,31 @@ +"""Test memory functionality.""" +from langchain.memory.summary_buffer import ConversationSummaryBufferMemory +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_summary_buffer_memory_no_buffer_yet() -> None: + """Test ConversationSummaryBufferMemory when no inputs put in buffer yet.""" + memory = ConversationSummaryBufferMemory(llm=FakeLLM(), memory_key="baz") + output = memory.load_memory_variables({}) + assert output == {"baz": ""} + + +def test_summary_buffer_memory_buffer_only() -> None: + """Test ConversationSummaryBufferMemory when only buffer.""" + memory = ConversationSummaryBufferMemory(llm=FakeLLM(), memory_key="baz") + memory.save_context({"input": "bar"}, {"output": "foo"}) + assert memory.buffer == ["Human: bar\nAI: foo"] + output = memory.load_memory_variables({}) + assert output == {"baz": "Human: bar\nAI: foo"} + + +def test_summary_buffer_memory_summary() -> None: + """Test ConversationSummaryBufferMemory when only buffer.""" + memory = ConversationSummaryBufferMemory( + llm=FakeLLM(), memory_key="baz", max_token_limit=13 + ) + memory.save_context({"input": "bar"}, {"output": "foo"}) + memory.save_context({"input": "bar1"}, {"output": "foo1"}) + assert memory.buffer == ["Human: bar1\nAI: foo1"] + output = memory.load_memory_variables({}) + assert output == {"baz": "foo\nHuman: bar1\nAI: foo1"} diff --git a/tests/integration_tests/chains/test_pal.py b/tests/integration_tests/chains/test_pal.py new file mode 100644 index 0000000000000000000000000000000000000000..9bbf6f8d88622a18a72f82224b5d302117f4dda9 --- /dev/null +++ b/tests/integration_tests/chains/test_pal.py @@ -0,0 +1,31 @@ +"""Test PAL chain.""" + +from langchain import OpenAI +from langchain.chains.pal.base import PALChain + + +def test_math_prompt() -> None: + """Test math prompt.""" + llm = OpenAI(model_name="code-davinci-002", temperature=0, max_tokens=512) + pal_chain = PALChain.from_math_prompt(llm) + question = ( + "Jan has three times the number of pets as Marcia. " + "Marcia has two more pets than Cindy. " + "If Cindy has four pets, how many total pets do the three have?" + ) + output = pal_chain.run(question) + assert output == "28" + + +def test_colored_object_prompt() -> None: + """Test colored object prompt.""" + llm = OpenAI(model_name="code-davinci-002", temperature=0, max_tokens=512) + pal_chain = PALChain.from_colored_object_prompt(llm) + question = ( + "On the desk, you see two blue booklets, " + "two purple booklets, and two yellow pairs of sunglasses. " + "If I remove all the pairs of sunglasses from the desk, " + "how many purple items remain on it?" + ) + output = pal_chain.run(question) + assert output == "2" diff --git a/tests/integration_tests/chains/test_react.py b/tests/integration_tests/chains/test_react.py new file mode 100644 index 0000000000000000000000000000000000000000..76a93609f7b3b46ec0fbd11eba580fe0ffe1ad0c --- /dev/null +++ b/tests/integration_tests/chains/test_react.py @@ -0,0 +1,18 @@ +"""Integration test for self ask with search.""" + +from langchain.agents.react.base import ReActChain +from langchain.docstore.wikipedia import Wikipedia +from langchain.llms.openai import OpenAI + + +def test_react() -> None: + """Test functionality on a prompt.""" + llm = OpenAI(temperature=0, model_name="text-davinci-002") + react = ReActChain(llm=llm, docstore=Wikipedia()) + question = ( + "Author David Chanoff has collaborated with a U.S. Navy admiral " + "who served as the ambassador to the United Kingdom under " + "which President?" + ) + output = react.run(question) + assert output == "Bill Clinton" diff --git a/tests/integration_tests/chains/test_self_ask_with_search.py b/tests/integration_tests/chains/test_self_ask_with_search.py new file mode 100644 index 0000000000000000000000000000000000000000..61ef78d9228d23921be0a123a794f5cafac8ee86 --- /dev/null +++ b/tests/integration_tests/chains/test_self_ask_with_search.py @@ -0,0 +1,18 @@ +"""Integration test for self ask with search.""" +from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain +from langchain.llms.openai import OpenAI +from langchain.utilities.google_serper import GoogleSerperAPIWrapper + + +def test_self_ask_with_search() -> None: + """Test functionality on a prompt.""" + question = "What is the hometown of the reigning men's U.S. Open champion?" + chain = SelfAskWithSearchChain( + llm=OpenAI(temperature=0), + search_chain=GoogleSerperAPIWrapper(), + input_key="q", + output_key="a", + ) + answer = chain.run(question) + final_answer = answer.split("\n")[-1] + assert final_answer == "El Palmar, Spain" diff --git a/tests/integration_tests/chains/test_sql_database.py b/tests/integration_tests/chains/test_sql_database.py new file mode 100644 index 0000000000000000000000000000000000000000..3518866c2e6c8b73974fcc2227031a198afeee04 --- /dev/null +++ b/tests/integration_tests/chains/test_sql_database.py @@ -0,0 +1,94 @@ +"""Test SQL Database Chain.""" +from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert + +from langchain.chains.sql_database.base import ( + SQLDatabaseChain, + SQLDatabaseSequentialChain, +) +from langchain.llms.openai import OpenAI +from langchain.sql_database import SQLDatabase + +metadata_obj = MetaData() + +user = Table( + "user", + metadata_obj, + Column("user_id", Integer, primary_key=True), + Column("user_name", String(16), nullable=False), + Column("user_company", String(16), nullable=False), +) + + +def test_sql_database_run() -> None: + """Test that commands can be run successfully and returned in correct format.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + db_chain = SQLDatabaseChain(llm=OpenAI(temperature=0), database=db) + output = db_chain.run("What company does Harrison work at?") + expected_output = " Harrison works at Foo." + assert output == expected_output + + +def test_sql_database_run_update() -> None: + """Test that update commands run successfully and returned in correct format.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + db_chain = SQLDatabaseChain(llm=OpenAI(temperature=0), database=db) + output = db_chain.run("Update Harrison's workplace to Bar") + expected_output = " Harrison's workplace has been updated to Bar." + assert output == expected_output + output = db_chain.run("What company does Harrison work at?") + expected_output = " Harrison works at Bar." + assert output == expected_output + + +def test_sql_database_sequential_chain_run() -> None: + """Test that commands can be run successfully SEQUENTIALLY + and returned in correct format.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + db_chain = SQLDatabaseSequentialChain.from_llm( + llm=OpenAI(temperature=0), database=db + ) + output = db_chain.run("What company does Harrison work at?") + expected_output = " Harrison works at Foo." + assert output == expected_output + + +def test_sql_database_sequential_chain_intermediate_steps() -> None: + """Test that commands can be run successfully SEQUENTIALLY and returned + in correct format. sWith Intermediate steps""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + db_chain = SQLDatabaseSequentialChain.from_llm( + llm=OpenAI(temperature=0), database=db, return_intermediate_steps=True + ) + output = db_chain("What company does Harrison work at?") + expected_output = " Harrison works at Foo." + assert output["result"] == expected_output + + query = output["intermediate_steps"][0] + expected_query = ( + " SELECT user_company FROM user WHERE user_name = 'Harrison' LIMIT 1;" + ) + assert query == expected_query + + query_results = output["intermediate_steps"][1] + expected_query_results = "[('Foo',)]" + assert query_results == expected_query_results diff --git a/tests/integration_tests/chat_models/__init__.py b/tests/integration_tests/chat_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/integration_tests/chat_models/test_openai.py b/tests/integration_tests/chat_models/test_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..347c6a76c66f84200d2aecfe88ae1239991c75d9 --- /dev/null +++ b/tests/integration_tests/chat_models/test_openai.py @@ -0,0 +1,130 @@ +"""Test ChatOpenAI wrapper.""" + +import pytest + +from langchain.callbacks.base import CallbackManager +from langchain.chat_models.openai import ChatOpenAI +from langchain.schema import ( + BaseMessage, + ChatGeneration, + ChatResult, + HumanMessage, + LLMResult, + SystemMessage, +) +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +def test_chat_openai() -> None: + """Test ChatOpenAI wrapper.""" + chat = ChatOpenAI(max_tokens=10) + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_chat_openai_system_message() -> None: + """Test ChatOpenAI wrapper with system message.""" + chat = ChatOpenAI(max_tokens=10) + system_message = SystemMessage(content="You are to chat with the user.") + human_message = HumanMessage(content="Hello") + response = chat([system_message, human_message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_chat_openai_generate() -> None: + """Test ChatOpenAI wrapper with generate.""" + chat = ChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = chat.generate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +def test_chat_openai_multiple_completions() -> None: + """Test ChatOpenAI wrapper with multiple completions.""" + chat = ChatOpenAI(max_tokens=10, n=5) + message = HumanMessage(content="Hello") + response = chat._generate([message]) + assert isinstance(response, ChatResult) + assert len(response.generations) == 5 + for generation in response.generations: + assert isinstance(generation.message, BaseMessage) + assert isinstance(generation.message.content, str) + + +def test_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = chat([message]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, BaseMessage) + + +def test_chat_openai_invalid_streaming_params() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + with pytest.raises(ValueError): + ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + n=5, + ) + + +@pytest.mark.asyncio +async def test_async_chat_openai() -> None: + """Test async generation.""" + chat = ChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +@pytest.mark.asyncio +async def test_async_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content diff --git a/tests/integration_tests/chat_models/test_promptlayer_openai.py b/tests/integration_tests/chat_models/test_promptlayer_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..c9962f75a38900a9803219ac26a356a8b61a410e --- /dev/null +++ b/tests/integration_tests/chat_models/test_promptlayer_openai.py @@ -0,0 +1,130 @@ +"""Test PromptLayerChatOpenAI wrapper.""" + +import pytest + +from langchain.callbacks.base import CallbackManager +from langchain.chat_models.promptlayer_openai import PromptLayerChatOpenAI +from langchain.schema import ( + BaseMessage, + ChatGeneration, + ChatResult, + HumanMessage, + LLMResult, + SystemMessage, +) +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +def test_promptlayer_chat_openai() -> None: + """Test PromptLayerChatOpenAI wrapper.""" + chat = PromptLayerChatOpenAI(max_tokens=10) + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_promptlayer_chat_openai_system_message() -> None: + """Test PromptLayerChatOpenAI wrapper with system message.""" + chat = PromptLayerChatOpenAI(max_tokens=10) + system_message = SystemMessage(content="You are to chat with the user.") + human_message = HumanMessage(content="Hello") + response = chat([system_message, human_message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_promptlayer_chat_openai_generate() -> None: + """Test PromptLayerChatOpenAI wrapper with generate.""" + chat = PromptLayerChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = chat.generate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +def test_promptlayer_chat_openai_multiple_completions() -> None: + """Test PromptLayerChatOpenAI wrapper with multiple completions.""" + chat = PromptLayerChatOpenAI(max_tokens=10, n=5) + message = HumanMessage(content="Hello") + response = chat._generate([message]) + assert isinstance(response, ChatResult) + assert len(response.generations) == 5 + for generation in response.generations: + assert isinstance(generation.message, BaseMessage) + assert isinstance(generation.message.content, str) + + +def test_promptlayer_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = PromptLayerChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = chat([message]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, BaseMessage) + + +def test_promptlayer_chat_openai_invalid_streaming_params() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + with pytest.raises(ValueError): + PromptLayerChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + n=5, + ) + + +@pytest.mark.asyncio +async def test_async_promptlayer_chat_openai() -> None: + """Test async generation.""" + chat = PromptLayerChatOpenAI(max_tokens=10, n=2) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 2 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +@pytest.mark.asyncio +async def test_async_promptlayer_chat_openai_streaming() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = PromptLayerChatOpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Hello") + response = await chat.agenerate([[message], [message]]) + assert callback_handler.llm_streams > 0 + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content diff --git a/tests/integration_tests/document_loaders/__init__.py b/tests/integration_tests/document_loaders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f0ac2bd83f9886f31dfa0756d31349a95e0db1ec --- /dev/null +++ b/tests/integration_tests/document_loaders/__init__.py @@ -0,0 +1 @@ +"""Test document loader integrations.""" diff --git a/tests/integration_tests/document_loaders/test_bshtml.py b/tests/integration_tests/document_loaders/test_bshtml.py new file mode 100644 index 0000000000000000000000000000000000000000..7e67d6211bc8234348e5d49ced091e7057f12a5a --- /dev/null +++ b/tests/integration_tests/document_loaders/test_bshtml.py @@ -0,0 +1,17 @@ +from pathlib import Path + +from langchain.document_loaders.html_bs import BSHTMLLoader + + +def test_bs_html_loader() -> None: + """Test unstructured loader.""" + file_path = Path(__file__).parent.parent / "examples/example.html" + loader = BSHTMLLoader(str(file_path)) + docs = loader.load() + + assert len(docs) == 1 + + metadata = docs[0].metadata + + assert metadata["title"] == "Chew dad's slippers" + assert metadata["source"] == str(file_path) diff --git a/tests/integration_tests/document_loaders/test_figma.py b/tests/integration_tests/document_loaders/test_figma.py new file mode 100644 index 0000000000000000000000000000000000000000..00fa6488e261b1b6bdb17ab45e68bf47ad8f18b8 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_figma.py @@ -0,0 +1,13 @@ +from langchain.document_loaders.figma import FigmaFileLoader + +ACCESS_TOKEN = "" +IDS = "" +KEY = "" + + +def test_figma_file_loader() -> None: + """Test Figma file loader.""" + loader = FigmaFileLoader(ACCESS_TOKEN, IDS, KEY) + docs = loader.load() + + assert len(docs) == 1 diff --git a/tests/integration_tests/document_loaders/test_ifixit.py b/tests/integration_tests/document_loaders/test_ifixit.py new file mode 100644 index 0000000000000000000000000000000000000000..c97be49e1a1ce1a99d3058e2fd7211e40bb9e005 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_ifixit.py @@ -0,0 +1,37 @@ +from langchain.document_loaders.ifixit import IFixitLoader + + +def test_ifixit_loader() -> None: + """Test iFixit loader.""" + web_path = "https://www.ifixit.com/Guide/iPad+9+Battery+Replacement/151279" + loader = IFixitLoader(web_path) + assert loader.page_type == "Guide" + assert loader.id == "151279" + assert loader.web_path == web_path + + +def test_ifixit_loader_teardown() -> None: + web_path = "https://www.ifixit.com/Teardown/Banana+Teardown/811" + loader = IFixitLoader(web_path) + """ Teardowns are just guides by a different name """ + assert loader.page_type == "Guide" + assert loader.id == "811" + + +def test_ifixit_loader_device() -> None: + web_path = "https://www.ifixit.com/Device/Standard_iPad" + loader = IFixitLoader(web_path) + """ Teardowns are just guides by a different name """ + assert loader.page_type == "Device" + assert loader.id == "Standard_iPad" + + +def test_ifixit_loader_answers() -> None: + web_path = ( + "https://www.ifixit.com/Answers/View/318583/My+iPhone+6+is+typing+and+" + "opening+apps+by+itself" + ) + loader = IFixitLoader(web_path) + + assert loader.page_type == "Answers" + assert loader.id == "318583" diff --git a/tests/integration_tests/document_loaders/test_pdf.py b/tests/integration_tests/document_loaders/test_pdf.py new file mode 100644 index 0000000000000000000000000000000000000000..dc046b47eb55331968761c5548069a4609fd6424 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_pdf.py @@ -0,0 +1,55 @@ +from pathlib import Path + +from langchain.document_loaders import ( + PDFMinerLoader, + PyMuPDFLoader, + UnstructuredPDFLoader, +) + + +def test_unstructured_pdf_loader() -> None: + """Test unstructured loader.""" + file_path = Path(__file__).parent.parent / "examples/hello.pdf" + loader = UnstructuredPDFLoader(str(file_path)) + docs = loader.load() + + assert len(docs) == 1 + + +def test_pdfminer_loader() -> None: + """Test PDFMiner loader.""" + file_path = Path(__file__).parent.parent / "examples/hello.pdf" + loader = PDFMinerLoader(str(file_path)) + docs = loader.load() + + assert len(docs) == 1 + + file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf" + loader = PDFMinerLoader(str(file_path)) + + docs = loader.load() + assert len(docs) == 1 + + +def test_pymupdf_loader() -> None: + """Test PyMuPDF loader.""" + file_path = Path(__file__).parent.parent / "examples/hello.pdf" + loader = PyMuPDFLoader(str(file_path)) + + docs = loader.load() + assert len(docs) == 1 + + file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf" + loader = PyMuPDFLoader(str(file_path)) + + docs = loader.load() + assert len(docs) == 16 + assert loader.web_path is None + + web_path = "https://people.sc.fsu.edu/~jpeterson/hello_world.pdf" + loader = PyMuPDFLoader(web_path) + + docs = loader.load() + assert loader.web_path == web_path + assert loader.file_path != web_path + assert len(docs) == 1 diff --git a/tests/integration_tests/embeddings/__init__.py b/tests/integration_tests/embeddings/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f72c0f3ba6021cad2e989e390a4d036612413064 --- /dev/null +++ b/tests/integration_tests/embeddings/__init__.py @@ -0,0 +1 @@ +"""Test embedding integrations.""" diff --git a/tests/integration_tests/embeddings/test_cohere.py b/tests/integration_tests/embeddings/test_cohere.py new file mode 100644 index 0000000000000000000000000000000000000000..4e2aec50d23ec3f3b2e904434fdd7f9b812d5dad --- /dev/null +++ b/tests/integration_tests/embeddings/test_cohere.py @@ -0,0 +1,19 @@ +"""Test cohere embeddings.""" +from langchain.embeddings.cohere import CohereEmbeddings + + +def test_cohere_embedding_documents() -> None: + """Test cohere embeddings.""" + documents = ["foo bar"] + embedding = CohereEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 2048 + + +def test_cohere_embedding_query() -> None: + """Test cohere embeddings.""" + document = "foo bar" + embedding = CohereEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 2048 diff --git a/tests/integration_tests/embeddings/test_huggingface.py b/tests/integration_tests/embeddings/test_huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..4c941580c6c775ed1f7366de676a86270e8efca2 --- /dev/null +++ b/tests/integration_tests/embeddings/test_huggingface.py @@ -0,0 +1,43 @@ +"""Test huggingface embeddings.""" +import unittest + +from langchain.embeddings.huggingface import ( + HuggingFaceEmbeddings, + HuggingFaceInstructEmbeddings, +) + + +@unittest.skip("This test causes a segfault.") +def test_huggingface_embedding_documents() -> None: + """Test huggingface embeddings.""" + documents = ["foo bar"] + embedding = HuggingFaceEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +@unittest.skip("This test causes a segfault.") +def test_huggingface_embedding_query() -> None: + """Test huggingface embeddings.""" + document = "foo bar" + embedding = HuggingFaceEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 768 + + +def test_huggingface_instructor_embedding_documents() -> None: + """Test huggingface embeddings.""" + documents = ["foo bar"] + embedding = HuggingFaceInstructEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_huggingface_instructor_embedding_query() -> None: + """Test huggingface embeddings.""" + query = "foo bar" + embedding = HuggingFaceInstructEmbeddings() + output = embedding.embed_query(query) + assert len(output) == 768 diff --git a/tests/integration_tests/embeddings/test_huggingface_hub.py b/tests/integration_tests/embeddings/test_huggingface_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..42dd55dbe6361fb3f2389bb14afff1a14a81cb61 --- /dev/null +++ b/tests/integration_tests/embeddings/test_huggingface_hub.py @@ -0,0 +1,28 @@ +"""Test HuggingFaceHub embeddings.""" +import pytest + +from langchain.embeddings import HuggingFaceHubEmbeddings + + +def test_huggingfacehub_embedding_documents() -> None: + """Test huggingfacehub embeddings.""" + documents = ["foo bar"] + embedding = HuggingFaceHubEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_huggingfacehub_embedding_query() -> None: + """Test huggingfacehub embeddings.""" + document = "foo bar" + embedding = HuggingFaceHubEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 768 + + +def test_huggingfacehub_embedding_invalid_repo() -> None: + """Test huggingfacehub embedding repo id validation.""" + # Only sentence-transformers models are currently supported. + with pytest.raises(ValueError): + HuggingFaceHubEmbeddings(repo_id="allenai/specter") diff --git a/tests/integration_tests/embeddings/test_openai.py b/tests/integration_tests/embeddings/test_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..9aa7d19c7832a2e2b35f335c39e885d951e8e2fc --- /dev/null +++ b/tests/integration_tests/embeddings/test_openai.py @@ -0,0 +1,31 @@ +"""Test openai embeddings.""" +from langchain.embeddings.openai import OpenAIEmbeddings + + +def test_openai_embedding_documents() -> None: + """Test openai embeddings.""" + documents = ["foo bar"] + embedding = OpenAIEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 1536 + + +def test_openai_embedding_documents_multiple() -> None: + """Test openai embeddings.""" + documents = ["foo bar", "bar foo", "foo"] + embedding = OpenAIEmbeddings(chunk_size=2) + embedding.embedding_ctx_length = 8191 + output = embedding.embed_documents(documents) + assert len(output) == 3 + assert len(output[0]) == 1536 + assert len(output[1]) == 1536 + assert len(output[2]) == 1536 + + +def test_openai_embedding_query() -> None: + """Test openai embeddings.""" + document = "foo bar" + embedding = OpenAIEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 1536 diff --git a/tests/integration_tests/embeddings/test_self_hosted.py b/tests/integration_tests/embeddings/test_self_hosted.py new file mode 100644 index 0000000000000000000000000000000000000000..055f73433464c02d7042b063d7f6e32199c8743e --- /dev/null +++ b/tests/integration_tests/embeddings/test_self_hosted.py @@ -0,0 +1,96 @@ +"""Test self-hosted embeddings.""" +from typing import Any + +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +from langchain.embeddings import ( + SelfHostedEmbeddings, + SelfHostedHuggingFaceEmbeddings, + SelfHostedHuggingFaceInstructEmbeddings, +) + + +def get_remote_instance() -> Any: + """Get remote instance for testing.""" + import runhouse as rh + + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1", use_spot=False) + gpu.install_packages(["pip:./"]) + return gpu + + +def test_self_hosted_huggingface_embedding_documents() -> None: + """Test self-hosted huggingface embeddings.""" + documents = ["foo bar"] + gpu = get_remote_instance() + embedding = SelfHostedHuggingFaceEmbeddings(hardware=gpu) + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_self_hosted_huggingface_embedding_query() -> None: + """Test self-hosted huggingface embeddings.""" + document = "foo bar" + gpu = get_remote_instance() + embedding = SelfHostedHuggingFaceEmbeddings(hardware=gpu) + output = embedding.embed_query(document) + assert len(output) == 768 + + +def test_self_hosted_huggingface_instructor_embedding_documents() -> None: + """Test self-hosted huggingface instruct embeddings.""" + documents = ["foo bar"] + gpu = get_remote_instance() + embedding = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu) + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_self_hosted_huggingface_instructor_embedding_query() -> None: + """Test self-hosted huggingface instruct embeddings.""" + query = "foo bar" + gpu = get_remote_instance() + embedding = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu) + output = embedding.embed_query(query) + assert len(output) == 768 + + +def get_pipeline() -> Any: + """Get pipeline for testing.""" + model_id = "facebook/bart-base" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + return pipeline("feature-extraction", model=model, tokenizer=tokenizer) + + +def inference_fn(pipeline: Any, prompt: str) -> Any: + """Inference function for testing.""" + # Return last hidden state of the model + if isinstance(prompt, list): + return [emb[0][-1] for emb in pipeline(prompt)] + return pipeline(prompt)[0][-1] + + +def test_self_hosted_embedding_documents() -> None: + """Test self-hosted huggingface instruct embeddings.""" + documents = ["foo bar"] * 2 + gpu = get_remote_instance() + embedding = SelfHostedEmbeddings( + model_load_fn=get_pipeline, hardware=gpu, inference_fn=inference_fn + ) + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 50265 + + +def test_self_hosted_embedding_query() -> None: + """Test self-hosted custom embeddings.""" + query = "foo bar" + gpu = get_remote_instance() + embedding = SelfHostedEmbeddings( + model_load_fn=get_pipeline, hardware=gpu, inference_fn=inference_fn + ) + output = embedding.embed_query(query) + assert len(output) == 50265 diff --git a/tests/integration_tests/embeddings/test_tensorflow_hub.py b/tests/integration_tests/embeddings/test_tensorflow_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..96bb007361f3c9e9ae7616510d2d7b9ce427d489 --- /dev/null +++ b/tests/integration_tests/embeddings/test_tensorflow_hub.py @@ -0,0 +1,19 @@ +"""Test TensorflowHub embeddings.""" +from langchain.embeddings import TensorflowHubEmbeddings + + +def test_tensorflowhub_embedding_documents() -> None: + """Test tensorflowhub embeddings.""" + documents = ["foo bar"] + embedding = TensorflowHubEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 512 + + +def test_tensorflowhub_embedding_query() -> None: + """Test tensorflowhub embeddings.""" + document = "foo bar" + embedding = TensorflowHubEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 512 diff --git a/tests/integration_tests/examples/example.html b/tests/integration_tests/examples/example.html new file mode 100644 index 0000000000000000000000000000000000000000..b9318b7a55850e70e83471f01032e46b02649097 --- /dev/null +++ b/tests/integration_tests/examples/example.html @@ -0,0 +1,25 @@ + + + Chew dad's slippers + + +

+ Instead of drinking water from the cat bowl, make sure to steal water from + the toilet +

+

Chase the red dot

+

+ Munch, munch, chomp, chomp hate dogs. Spill litter box, scratch at owner, + destroy all furniture, especially couch get scared by sudden appearance of + cucumber cat is love, cat is life fat baby cat best buddy little guy for + catch eat throw up catch eat throw up bad birds jump on fridge. Purr like + a car engine oh yes, there is my human woman she does best pats ever that + all i like about her hiss meow . +

+

+ Dead stare with ears cocked when owners are asleep, cry for no apparent + reason meow all night. Plop down in the middle where everybody walks favor + packaging over toy. Sit on the laptop kitty pounce, trip, faceplant. +

+ + diff --git a/tests/integration_tests/llms/__init__.py b/tests/integration_tests/llms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad06b85ff81b6eb452072202f31018483911b8e --- /dev/null +++ b/tests/integration_tests/llms/__init__.py @@ -0,0 +1 @@ +"""All integration tests for LLM objects.""" diff --git a/tests/integration_tests/llms/test_ai21.py b/tests/integration_tests/llms/test_ai21.py new file mode 100644 index 0000000000000000000000000000000000000000..6e56e52694fdde2550f207b8a19af79552b7d9f9 --- /dev/null +++ b/tests/integration_tests/llms/test_ai21.py @@ -0,0 +1,28 @@ +"""Test AI21 API wrapper.""" + +from pathlib import Path + +from langchain.llms.ai21 import AI21 +from langchain.llms.loading import load_llm + + +def test_ai21_call() -> None: + """Test valid call to ai21.""" + llm = AI21(maxTokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_ai21_call_experimental() -> None: + """Test valid call to ai21 with an experimental model.""" + llm = AI21(maxTokens=10, model="j1-grande-instruct") + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an AI21 LLM.""" + llm = AI21(maxTokens=10) + llm.save(file_path=tmp_path / "ai21.yaml") + loaded_llm = load_llm(tmp_path / "ai21.yaml") + assert llm == loaded_llm diff --git a/tests/integration_tests/llms/test_aleph_alpha.py b/tests/integration_tests/llms/test_aleph_alpha.py new file mode 100644 index 0000000000000000000000000000000000000000..646b767667eb3e9096bacc481bf49f2d3bb3f434 --- /dev/null +++ b/tests/integration_tests/llms/test_aleph_alpha.py @@ -0,0 +1,10 @@ +"""Test Aleph Alpha API wrapper.""" + +from langchain.llms.aleph_alpha import AlephAlpha + + +def test_aleph_alpha_call() -> None: + """Test valid call to cohere.""" + llm = AlephAlpha(maximum_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_anthropic.py b/tests/integration_tests/llms/test_anthropic.py new file mode 100644 index 0000000000000000000000000000000000000000..9077633abbf89d5f30fdaa850fdab045995ee9a4 --- /dev/null +++ b/tests/integration_tests/llms/test_anthropic.py @@ -0,0 +1,23 @@ +"""Test Anthropic API wrapper.""" + +from typing import Generator + +from langchain.llms.anthropic import Anthropic + + +def test_anthropic_call() -> None: + """Test valid call to anthropic.""" + llm = Anthropic(model="bare-nano-0") + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_anthropic_streaming() -> None: + """Test streaming tokens from anthropic.""" + llm = Anthropic(model="bare-nano-0") + generator = llm.stream("I'm Pickle Rick") + + assert isinstance(generator, Generator) + + for token in generator: + assert isinstance(token["completion"], str) diff --git a/tests/integration_tests/llms/test_banana.py b/tests/integration_tests/llms/test_banana.py new file mode 100644 index 0000000000000000000000000000000000000000..03465e1ad393d375b7d96d427ffe89a94060ed98 --- /dev/null +++ b/tests/integration_tests/llms/test_banana.py @@ -0,0 +1,10 @@ +"""Test BananaDev API wrapper.""" + +from langchain.llms.bananadev import Banana + + +def test_banana_call() -> None: + """Test valid call to BananaDev.""" + llm = Banana() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_cerebrium.py b/tests/integration_tests/llms/test_cerebrium.py new file mode 100644 index 0000000000000000000000000000000000000000..17e91323ccc05ae1da821bbab5a4da5342ee0b7d --- /dev/null +++ b/tests/integration_tests/llms/test_cerebrium.py @@ -0,0 +1,10 @@ +"""Test CerebriumAI API wrapper.""" + +from langchain.llms.cerebriumai import CerebriumAI + + +def test_cerebriumai_call() -> None: + """Test valid call to cerebriumai.""" + llm = CerebriumAI(max_length=10) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_cohere.py b/tests/integration_tests/llms/test_cohere.py new file mode 100644 index 0000000000000000000000000000000000000000..4c260982b70de49014ded54f74f448d616c875e1 --- /dev/null +++ b/tests/integration_tests/llms/test_cohere.py @@ -0,0 +1,22 @@ +"""Test Cohere API wrapper.""" + +from pathlib import Path + +from langchain.llms.cohere import Cohere +from langchain.llms.loading import load_llm +from tests.integration_tests.llms.utils import assert_llm_equality + + +def test_cohere_call() -> None: + """Test valid call to cohere.""" + llm = Cohere(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an Cohere LLM.""" + llm = Cohere(max_tokens=10) + llm.save(file_path=tmp_path / "cohere.yaml") + loaded_llm = load_llm(tmp_path / "cohere.yaml") + assert_llm_equality(llm, loaded_llm) diff --git a/tests/integration_tests/llms/test_forefrontai.py b/tests/integration_tests/llms/test_forefrontai.py new file mode 100644 index 0000000000000000000000000000000000000000..228ab1207bf5da83d7fba93886e5dfad77ee4795 --- /dev/null +++ b/tests/integration_tests/llms/test_forefrontai.py @@ -0,0 +1,10 @@ +"""Test ForefrontAI API wrapper.""" + +from langchain.llms.forefrontai import ForefrontAI + + +def test_forefrontai_call() -> None: + """Test valid call to forefrontai.""" + llm = ForefrontAI(length=10) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_gooseai.py b/tests/integration_tests/llms/test_gooseai.py new file mode 100644 index 0000000000000000000000000000000000000000..93d5d5cb1cefaaa38fce3ecb43c2d57c5627af3d --- /dev/null +++ b/tests/integration_tests/llms/test_gooseai.py @@ -0,0 +1,28 @@ +"""Test GooseAI API wrapper.""" + +from langchain.llms.gooseai import GooseAI + + +def test_gooseai_call() -> None: + """Test valid call to gooseai.""" + llm = GooseAI(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_gooseai_call_fairseq() -> None: + """Test valid call to gooseai with fairseq model.""" + llm = GooseAI(model_name="fairseq-1-3b", max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_gooseai_stop_valid() -> None: + """Test gooseai stop logic on valid configuration.""" + query = "write an ordered list of five items" + first_llm = GooseAI(stop="3", temperature=0) + first_output = first_llm(query) + second_llm = GooseAI(temperature=0) + second_output = second_llm(query, stop=["3"]) + # Because it stops on new lines, shouldn't return anything + assert first_output == second_output diff --git a/tests/integration_tests/llms/test_huggingface_endpoint.py b/tests/integration_tests/llms/test_huggingface_endpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..61639669d3b256843210eea149862d89afcdbbf2 --- /dev/null +++ b/tests/integration_tests/llms/test_huggingface_endpoint.py @@ -0,0 +1,50 @@ +"""Test HuggingFace API wrapper.""" + +import unittest +from pathlib import Path + +import pytest + +from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint +from langchain.llms.loading import load_llm +from tests.integration_tests.llms.utils import assert_llm_equality + + +@unittest.skip( + "This test requires an inference endpoint. Tested with Hugging Face endpoints" +) +def test_huggingface_endpoint_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFaceEndpoint( + endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} + ) + output = llm("Say foo:") + print(output) + assert isinstance(output, str) + + +@unittest.skip( + "This test requires an inference endpoint. Tested with Hugging Face endpoints" +) +def test_huggingface_endpoint_text2text_generation() -> None: + """Test valid call to HuggingFace text2text model.""" + llm = HuggingFaceEndpoint(endpoint_url="", task="text2text-generation") + output = llm("The capital of New York is") + assert output == "Albany" + + +def test_huggingface_endpoint_call_error() -> None: + """Test valid call to HuggingFace that errors.""" + llm = HuggingFaceEndpoint(model_kwargs={"max_new_tokens": -1}) + with pytest.raises(ValueError): + llm("Say foo:") + + +def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" + llm = HuggingFaceEndpoint( + endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} + ) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) diff --git a/tests/integration_tests/llms/test_huggingface_hub.py b/tests/integration_tests/llms/test_huggingface_hub.py new file mode 100644 index 0000000000000000000000000000000000000000..df0b441618f04150ca09eff8914caefd9b65e327 --- /dev/null +++ b/tests/integration_tests/llms/test_huggingface_hub.py @@ -0,0 +1,38 @@ +"""Test HuggingFace API wrapper.""" + +from pathlib import Path + +import pytest + +from langchain.llms.huggingface_hub import HuggingFaceHub +from langchain.llms.loading import load_llm +from tests.integration_tests.llms.utils import assert_llm_equality + + +def test_huggingface_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFaceHub(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_huggingface_text2text_generation() -> None: + """Test valid call to HuggingFace text2text model.""" + llm = HuggingFaceHub(repo_id="google/flan-t5-xl") + output = llm("The capital of New York is") + assert output == "Albany" + + +def test_huggingface_call_error() -> None: + """Test valid call to HuggingFace that errors.""" + llm = HuggingFaceHub(model_kwargs={"max_new_tokens": -1}) + with pytest.raises(ValueError): + llm("Say foo:") + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" + llm = HuggingFaceHub(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) diff --git a/tests/integration_tests/llms/test_huggingface_pipeline.py b/tests/integration_tests/llms/test_huggingface_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..b224a0a9ad3dd5b3a8addca0c41401e7d55405fd --- /dev/null +++ b/tests/integration_tests/llms/test_huggingface_pipeline.py @@ -0,0 +1,50 @@ +"""Test HuggingFace Pipeline wrapper.""" + +from pathlib import Path + +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +from langchain.llms.huggingface_pipeline import HuggingFacePipeline +from langchain.llms.loading import load_llm +from tests.integration_tests.llms.utils import assert_llm_equality + + +def test_huggingface_pipeline_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFacePipeline.from_model_id( + model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10} + ) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_huggingface_pipeline_text2text_generation() -> None: + """Test valid call to HuggingFace text2text generation model.""" + llm = HuggingFacePipeline.from_model_id( + model_id="google/flan-t5-small", task="text2text-generation" + ) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" + llm = HuggingFacePipeline.from_model_id( + model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10} + ) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) + + +def test_init_with_pipeline() -> None: + """Test initialization with a HF pipeline.""" + model_id = "gpt2" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + pipe = pipeline( + "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10 + ) + llm = HuggingFacePipeline(pipeline=pipe) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_manifest.py b/tests/integration_tests/llms/test_manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..eca4a94b0fa25392b4cfad99b20aac109e9caf6d --- /dev/null +++ b/tests/integration_tests/llms/test_manifest.py @@ -0,0 +1,12 @@ +"""Test manifest integration.""" +from langchain.llms.manifest import ManifestWrapper + + +def test_manifest_wrapper() -> None: + """Test manifest wrapper.""" + from manifest import Manifest + + manifest = Manifest(client_name="openai") + llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0}) + output = llm("The capital of New York is:") + assert output == "Albany" diff --git a/tests/integration_tests/llms/test_modal.py b/tests/integration_tests/llms/test_modal.py new file mode 100644 index 0000000000000000000000000000000000000000..495da20e4787c103958f3bbb952fdab2e41dc70d --- /dev/null +++ b/tests/integration_tests/llms/test_modal.py @@ -0,0 +1,10 @@ +"""Test Modal API wrapper.""" + +from langchain.llms.modal import Modal + + +def test_modal_call() -> None: + """Test valid call to Modal.""" + llm = Modal() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_nlpcloud.py b/tests/integration_tests/llms/test_nlpcloud.py new file mode 100644 index 0000000000000000000000000000000000000000..4c5ccca0d7d3727495eff46fa03920e68cff18cf --- /dev/null +++ b/tests/integration_tests/llms/test_nlpcloud.py @@ -0,0 +1,22 @@ +"""Test NLPCloud API wrapper.""" + +from pathlib import Path + +from langchain.llms.loading import load_llm +from langchain.llms.nlpcloud import NLPCloud +from tests.integration_tests.llms.utils import assert_llm_equality + + +def test_nlpcloud_call() -> None: + """Test valid call to nlpcloud.""" + llm = NLPCloud(max_length=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an NLPCloud LLM.""" + llm = NLPCloud(max_length=10) + llm.save(file_path=tmp_path / "nlpcloud.yaml") + loaded_llm = load_llm(tmp_path / "nlpcloud.yaml") + assert_llm_equality(llm, loaded_llm) diff --git a/tests/integration_tests/llms/test_openai.py b/tests/integration_tests/llms/test_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..1ada0ca6094221465ab68f7e2a7183595d7ead37 --- /dev/null +++ b/tests/integration_tests/llms/test_openai.py @@ -0,0 +1,213 @@ +"""Test OpenAI API wrapper.""" + +from pathlib import Path +from typing import Generator + +import pytest + +from langchain.callbacks.base import CallbackManager +from langchain.llms.loading import load_llm +from langchain.llms.openai import OpenAI, OpenAIChat +from langchain.schema import LLMResult +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +def test_openai_call() -> None: + """Test valid call to openai.""" + llm = OpenAI(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_openai_extra_kwargs() -> None: + """Test extra kwargs to openai.""" + # Check that foo is saved in extra_kwargs. + llm = OpenAI(foo=3, max_tokens=10) + assert llm.max_tokens == 10 + assert llm.model_kwargs == {"foo": 3} + + # Test that if extra_kwargs are provided, they are added to it. + llm = OpenAI(foo=3, model_kwargs={"bar": 2}) + assert llm.model_kwargs == {"foo": 3, "bar": 2} + + # Test that if provided twice it errors + with pytest.raises(ValueError): + OpenAI(foo=3, model_kwargs={"foo": 2}) + + +def test_openai_llm_output_contains_model_name() -> None: + """Test llm_output contains model_name.""" + llm = OpenAI(max_tokens=10) + llm_result = llm.generate(["Hello, how are you?"]) + assert llm_result.llm_output is not None + assert llm_result.llm_output["model_name"] == llm.model_name + + +def test_openai_stop_valid() -> None: + """Test openai stop logic on valid configuration.""" + query = "write an ordered list of five items" + first_llm = OpenAI(stop="3", temperature=0) + first_output = first_llm(query) + second_llm = OpenAI(temperature=0) + second_output = second_llm(query, stop=["3"]) + # Because it stops on new lines, shouldn't return anything + assert first_output == second_output + + +def test_openai_stop_error() -> None: + """Test openai stop logic on bad configuration.""" + llm = OpenAI(stop="3", temperature=0) + with pytest.raises(ValueError): + llm("write an ordered list of five items", stop=["\n"]) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an OpenAI LLM.""" + llm = OpenAI(max_tokens=10) + llm.save(file_path=tmp_path / "openai.yaml") + loaded_llm = load_llm(tmp_path / "openai.yaml") + assert loaded_llm == llm + + +def test_openai_streaming() -> None: + """Test streaming tokens from OpenAI.""" + llm = OpenAI(max_tokens=10) + generator = llm.stream("I'm Pickle Rick") + + assert isinstance(generator, Generator) + + for token in generator: + assert isinstance(token["choices"][0]["text"], str) + + +def test_openai_streaming_error() -> None: + """Test error handling in stream.""" + llm = OpenAI(best_of=2) + with pytest.raises(ValueError): + llm.stream("I'm Pickle Rick") + + +def test_openai_streaming_best_of_error() -> None: + """Test validation for streaming fails if best_of is not 1.""" + with pytest.raises(ValueError): + OpenAI(best_of=2, streaming=True) + + +def test_openai_streaming_n_error() -> None: + """Test validation for streaming fails if n is not 1.""" + with pytest.raises(ValueError): + OpenAI(n=2, streaming=True) + + +def test_openai_streaming_multiple_prompts_error() -> None: + """Test validation for streaming fails if multiple prompts are given.""" + with pytest.raises(ValueError): + OpenAI(streaming=True).generate(["I'm Pickle Rick", "I'm Pickle Rick"]) + + +def test_openai_streaming_call() -> None: + """Test valid call to openai.""" + llm = OpenAI(max_tokens=10, streaming=True) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_openai_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + llm = OpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + llm("Write me a sentence with 100 words.") + assert callback_handler.llm_streams == 10 + + +@pytest.mark.asyncio +async def test_openai_async_generate() -> None: + """Test async generation.""" + llm = OpenAI(max_tokens=10) + output = await llm.agenerate(["Hello, how are you?"]) + assert isinstance(output, LLMResult) + + +@pytest.mark.asyncio +async def test_openai_async_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + llm = OpenAI( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + result = await llm.agenerate(["Write me a sentence with 100 words."]) + assert callback_handler.llm_streams == 10 + assert isinstance(result, LLMResult) + + +def test_openai_chat_wrong_class() -> None: + """Test OpenAIChat with wrong class still works.""" + llm = OpenAI(model_name="gpt-3.5-turbo") + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_openai_chat() -> None: + """Test OpenAIChat.""" + llm = OpenAIChat(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_openai_chat_streaming() -> None: + """Test OpenAIChat with streaming option.""" + llm = OpenAIChat(max_tokens=10, streaming=True) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_openai_chat_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + llm = OpenAIChat( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + llm("Write me a sentence with 100 words.") + assert callback_handler.llm_streams != 0 + + +@pytest.mark.asyncio +async def test_openai_chat_async_generate() -> None: + """Test async chat.""" + llm = OpenAIChat(max_tokens=10) + output = await llm.agenerate(["Hello, how are you?"]) + assert isinstance(output, LLMResult) + + +@pytest.mark.asyncio +async def test_openai_chat_async_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + llm = OpenAIChat( + max_tokens=10, + streaming=True, + temperature=0, + callback_manager=callback_manager, + verbose=True, + ) + result = await llm.agenerate(["Write me a sentence with 100 words."]) + assert callback_handler.llm_streams != 0 + assert isinstance(result, LLMResult) diff --git a/tests/integration_tests/llms/test_petals.py b/tests/integration_tests/llms/test_petals.py new file mode 100644 index 0000000000000000000000000000000000000000..41fe53b27b9bbff7d8045793f9be9bb3c7755a0c --- /dev/null +++ b/tests/integration_tests/llms/test_petals.py @@ -0,0 +1,10 @@ +"""Test Petals API wrapper.""" + +from langchain.llms.petals import Petals + + +def test_gooseai_call() -> None: + """Test valid call to gooseai.""" + llm = Petals(max_new_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_promptlayer_openai.py b/tests/integration_tests/llms/test_promptlayer_openai.py new file mode 100644 index 0000000000000000000000000000000000000000..b054e321028263f4eafb85fdb409a010696843fd --- /dev/null +++ b/tests/integration_tests/llms/test_promptlayer_openai.py @@ -0,0 +1,76 @@ +"""Test PromptLayer OpenAI API wrapper.""" + +from pathlib import Path +from typing import Generator + +import pytest + +from langchain.llms.loading import load_llm +from langchain.llms.promptlayer_openai import PromptLayerOpenAI + + +def test_promptlayer_openai_call() -> None: + """Test valid call to promptlayer openai.""" + llm = PromptLayerOpenAI(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_promptlayer_openai_extra_kwargs() -> None: + """Test extra kwargs to promptlayer openai.""" + # Check that foo is saved in extra_kwargs. + llm = PromptLayerOpenAI(foo=3, max_tokens=10) + assert llm.max_tokens == 10 + assert llm.model_kwargs == {"foo": 3} + + # Test that if extra_kwargs are provided, they are added to it. + llm = PromptLayerOpenAI(foo=3, model_kwargs={"bar": 2}) + assert llm.model_kwargs == {"foo": 3, "bar": 2} + + # Test that if provided twice it errors + with pytest.raises(ValueError): + PromptLayerOpenAI(foo=3, model_kwargs={"foo": 2}) + + +def test_promptlayer_openai_stop_valid() -> None: + """Test promptlayer openai stop logic on valid configuration.""" + query = "write an ordered list of five items" + first_llm = PromptLayerOpenAI(stop="3", temperature=0) + first_output = first_llm(query) + second_llm = PromptLayerOpenAI(temperature=0) + second_output = second_llm(query, stop=["3"]) + # Because it stops on new lines, shouldn't return anything + assert first_output == second_output + + +def test_promptlayer_openai_stop_error() -> None: + """Test promptlayer openai stop logic on bad configuration.""" + llm = PromptLayerOpenAI(stop="3", temperature=0) + with pytest.raises(ValueError): + llm("write an ordered list of five items", stop=["\n"]) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an promptlayer OpenAPI LLM.""" + llm = PromptLayerOpenAI(max_tokens=10) + llm.save(file_path=tmp_path / "openai.yaml") + loaded_llm = load_llm(tmp_path / "openai.yaml") + assert loaded_llm == llm + + +def test_promptlayer_openai_streaming() -> None: + """Test streaming tokens from promptalyer OpenAI.""" + llm = PromptLayerOpenAI(max_tokens=10) + generator = llm.stream("I'm Pickle Rick") + + assert isinstance(generator, Generator) + + for token in generator: + assert isinstance(token["choices"][0]["text"], str) + + +def test_promptlayer_openai_streaming_error() -> None: + """Test error handling in stream.""" + llm = PromptLayerOpenAI(best_of=2) + with pytest.raises(ValueError): + llm.stream("I'm Pickle Rick") diff --git a/tests/integration_tests/llms/test_propmptlayer_openai_chat.py b/tests/integration_tests/llms/test_propmptlayer_openai_chat.py new file mode 100644 index 0000000000000000000000000000000000000000..35e0cce4737a3d0168da84a32fb2aa3857e95493 --- /dev/null +++ b/tests/integration_tests/llms/test_propmptlayer_openai_chat.py @@ -0,0 +1,41 @@ +"""Test PromptLayer OpenAIChat API wrapper.""" + +from pathlib import Path + +import pytest + +from langchain.llms.loading import load_llm +from langchain.llms.promptlayer_openai import PromptLayerOpenAIChat + + +def test_promptlayer_openai_chat_call() -> None: + """Test valid call to promptlayer openai.""" + llm = PromptLayerOpenAIChat(max_tokens=10) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_promptlayer_openai_chat_stop_valid() -> None: + """Test promptlayer openai stop logic on valid configuration.""" + query = "write an ordered list of five items" + first_llm = PromptLayerOpenAIChat(stop="3", temperature=0) + first_output = first_llm(query) + second_llm = PromptLayerOpenAIChat(temperature=0) + second_output = second_llm(query, stop=["3"]) + # Because it stops on new lines, shouldn't return anything + assert first_output == second_output + + +def test_promptlayer_openai_chat_stop_error() -> None: + """Test promptlayer openai stop logic on bad configuration.""" + llm = PromptLayerOpenAIChat(stop="3", temperature=0) + with pytest.raises(ValueError): + llm("write an ordered list of five items", stop=["\n"]) + + +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an promptlayer OpenAPI LLM.""" + llm = PromptLayerOpenAIChat(max_tokens=10) + llm.save(file_path=tmp_path / "openai.yaml") + loaded_llm = load_llm(tmp_path / "openai.yaml") + assert loaded_llm == llm diff --git a/tests/integration_tests/llms/test_self_hosted_llm.py b/tests/integration_tests/llms/test_self_hosted_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..0dc753abf15f4df094dbe69ac6b46d1103d848b0 --- /dev/null +++ b/tests/integration_tests/llms/test_self_hosted_llm.py @@ -0,0 +1,105 @@ +"""Test Self-hosted LLMs.""" +import pickle +from typing import Any, List, Optional + +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + +from langchain.llms import SelfHostedHuggingFaceLLM, SelfHostedPipeline + +model_reqs = ["pip:./", "transformers", "torch"] + + +def get_remote_instance() -> Any: + """Get remote instance for testing.""" + import runhouse as rh + + return rh.cluster(name="rh-a10x", instance_type="A100:1", use_spot=False) + + +def test_self_hosted_huggingface_pipeline_text_generation() -> None: + """Test valid call to self-hosted HuggingFace text generation model.""" + gpu = get_remote_instance() + llm = SelfHostedHuggingFaceLLM( + model_id="gpt2", + task="text-generation", + model_kwargs={"n_positions": 1024}, + hardware=gpu, + model_reqs=model_reqs, + ) + output = llm("Say foo:") # type: ignore + assert isinstance(output, str) + + +def test_self_hosted_huggingface_pipeline_text2text_generation() -> None: + """Test valid call to self-hosted HuggingFace text2text generation model.""" + gpu = get_remote_instance() + llm = SelfHostedHuggingFaceLLM( + model_id="google/flan-t5-small", + task="text2text-generation", + hardware=gpu, + model_reqs=model_reqs, + ) + output = llm("Say foo:") # type: ignore + assert isinstance(output, str) + + +def load_pipeline() -> Any: + """Load pipeline for testing.""" + model_id = "gpt2" + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + pipe = pipeline( + "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10 + ) + return pipe + + +def inference_fn(pipeline: Any, prompt: str, stop: Optional[List[str]] = None) -> str: + """Inference function for testing.""" + return pipeline(prompt)[0]["generated_text"] + + +def test_init_with_local_pipeline() -> None: + """Test initialization with a self-hosted HF pipeline.""" + gpu = get_remote_instance() + pipeline = load_pipeline() + llm = SelfHostedPipeline.from_pipeline( + pipeline=pipeline, + hardware=gpu, + model_reqs=model_reqs, + inference_fn=inference_fn, + ) + output = llm("Say foo:") # type: ignore + assert isinstance(output, str) + + +def test_init_with_pipeline_path() -> None: + """Test initialization with a self-hosted HF pipeline.""" + gpu = get_remote_instance() + pipeline = load_pipeline() + import runhouse as rh + + rh.blob(pickle.dumps(pipeline), path="models/pipeline.pkl").save().to( + gpu, path="models" + ) + llm = SelfHostedPipeline.from_pipeline( + pipeline="models/pipeline.pkl", + hardware=gpu, + model_reqs=model_reqs, + inference_fn=inference_fn, + ) + output = llm("Say foo:") # type: ignore + assert isinstance(output, str) + + +def test_init_with_pipeline_fn() -> None: + """Test initialization with a self-hosted HF pipeline.""" + gpu = get_remote_instance() + llm = SelfHostedPipeline( + model_load_fn=load_pipeline, + hardware=gpu, + model_reqs=model_reqs, + inference_fn=inference_fn, + ) + output = llm("Say foo:") # type: ignore + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_stochasticai.py b/tests/integration_tests/llms/test_stochasticai.py new file mode 100644 index 0000000000000000000000000000000000000000..8ab45d98a0585445834f9cb40216bbedc2fd0982 --- /dev/null +++ b/tests/integration_tests/llms/test_stochasticai.py @@ -0,0 +1,10 @@ +"""Test StochasticAI API wrapper.""" + +from langchain.llms.stochasticai import StochasticAI + + +def test_stochasticai_call() -> None: + """Test valid call to StochasticAI.""" + llm = StochasticAI() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/test_writer.py b/tests/integration_tests/llms/test_writer.py new file mode 100644 index 0000000000000000000000000000000000000000..672efc613c8338fda2a2c99ece721b36d210e620 --- /dev/null +++ b/tests/integration_tests/llms/test_writer.py @@ -0,0 +1,10 @@ +"""Test Writer API wrapper.""" + +from langchain.llms.writer import Writer + + +def test_writer_call() -> None: + """Test valid call to Writer.""" + llm = Writer() + output = llm("Say foo:") + assert isinstance(output, str) diff --git a/tests/integration_tests/llms/utils.py b/tests/integration_tests/llms/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..31a27d887a394194bfa5e5cb6458f63a339aeccc --- /dev/null +++ b/tests/integration_tests/llms/utils.py @@ -0,0 +1,16 @@ +"""Utils for LLM Tests.""" + +from langchain.llms.base import BaseLLM + + +def assert_llm_equality(llm: BaseLLM, loaded_llm: BaseLLM) -> None: + """Assert LLM Equality for tests.""" + # Check that they are the same type. + assert type(llm) == type(loaded_llm) + # Client field can be session based, so hash is different despite + # all other values being the same, so just assess all other fields + for field in llm.__fields__.keys(): + if field != "client" and field != "pipeline": + val = getattr(llm, field) + new_val = getattr(loaded_llm, field) + assert new_val == val diff --git a/tests/integration_tests/test_googlesearch_api.py b/tests/integration_tests/test_googlesearch_api.py new file mode 100644 index 0000000000000000000000000000000000000000..3693e212b33956cb113de0ac67f9665efb5b64f0 --- /dev/null +++ b/tests/integration_tests/test_googlesearch_api.py @@ -0,0 +1,19 @@ +"""Integration test for Google Search API Wrapper.""" +from langchain.utilities.google_search import GoogleSearchAPIWrapper + + +def test_call() -> None: + """Test that call gives the correct answer.""" + search = GoogleSearchAPIWrapper() + output = search.run("What was Obama's first name?") + assert "Barack Hussein Obama II" in output + + +def test_no_result_call() -> None: + """Test that call gives no result.""" + search = GoogleSearchAPIWrapper() + output = search.run( + "NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL_NORESULTCALL" + ) + print(type(output)) + assert "No good Google Search Result was found" == output diff --git a/tests/integration_tests/test_googleserper_api.py b/tests/integration_tests/test_googleserper_api.py new file mode 100644 index 0000000000000000000000000000000000000000..67baae4b22985eff1dfee6fb223079c1d1609ca9 --- /dev/null +++ b/tests/integration_tests/test_googleserper_api.py @@ -0,0 +1,9 @@ +"""Integration test for Serper.dev's Google Search API Wrapper.""" +from langchain.utilities.google_serper import GoogleSerperAPIWrapper + + +def test_call() -> None: + """Test that call gives the correct answer.""" + search = GoogleSerperAPIWrapper() + output = search.run("What was Obama's first name?") + assert "Barack Hussein Obama II" in output diff --git a/tests/integration_tests/test_ngram_overlap_example_selector.py b/tests/integration_tests/test_ngram_overlap_example_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..5c7bd4b140ae4863059c1a5b4945096bb1b40785 --- /dev/null +++ b/tests/integration_tests/test_ngram_overlap_example_selector.py @@ -0,0 +1,73 @@ +"""Test functionality related to ngram overlap based selector.""" + +import pytest + +from langchain.prompts.example_selector.ngram_overlap import ( + NGramOverlapExampleSelector, + ngram_overlap_score, +) +from langchain.prompts.prompt import PromptTemplate + +EXAMPLES = [ + {"input": "See Spot run.", "output": "foo1"}, + {"input": "My dog barks.", "output": "foo2"}, + {"input": "Spot can run.", "output": "foo3"}, +] + + +@pytest.fixture +def selector() -> NGramOverlapExampleSelector: + """Get ngram overlap based selector to use in tests.""" + prompts = PromptTemplate( + input_variables=["input", "output"], template="Input: {input}\nOutput: {output}" + ) + selector = NGramOverlapExampleSelector( + examples=EXAMPLES, + example_prompt=prompts, + ) + return selector + + +def test_selector_valid(selector: NGramOverlapExampleSelector) -> None: + """Test NGramOverlapExampleSelector can select examples.""" + sentence = "Spot can run." + output = selector.select_examples({"input": sentence}) + assert output == [EXAMPLES[2], EXAMPLES[0], EXAMPLES[1]] + + +def test_selector_add_example(selector: NGramOverlapExampleSelector) -> None: + """Test NGramOverlapExampleSelector can add an example.""" + new_example = {"input": "Spot plays fetch.", "output": "foo4"} + selector.add_example(new_example) + sentence = "Spot can run." + output = selector.select_examples({"input": sentence}) + assert output == [EXAMPLES[2], EXAMPLES[0]] + [new_example] + [EXAMPLES[1]] + + +def test_selector_threshold_zero(selector: NGramOverlapExampleSelector) -> None: + """Tests NGramOverlapExampleSelector threshold set to 0.0.""" + selector.threshold = 0.0 + sentence = "Spot can run." + output = selector.select_examples({"input": sentence}) + assert output == [EXAMPLES[2], EXAMPLES[0]] + + +def test_selector_threshold_more_than_one( + selector: NGramOverlapExampleSelector, +) -> None: + """Tests NGramOverlapExampleSelector threshold greater than 1.0.""" + selector.threshold = 1.0 + 1e-9 + sentence = "Spot can run." + output = selector.select_examples({"input": sentence}) + assert output == [] + + +def test_ngram_overlap_score(selector: NGramOverlapExampleSelector) -> None: + """Tests that ngram_overlap_score returns correct values.""" + selector.threshold = 1.0 + 1e-9 + none = ngram_overlap_score(["Spot can run."], ["My dog barks."]) + some = ngram_overlap_score(["Spot can run."], ["See Spot run."]) + complete = ngram_overlap_score(["Spot can run."], ["Spot can run."]) + + check = [abs(none - 0.0) < 1e-9, 0.0 < some < 1.0, abs(complete - 1.0) < 1e-9] + assert check == [True, True, True] diff --git a/tests/integration_tests/test_nlp_text_splitters.py b/tests/integration_tests/test_nlp_text_splitters.py new file mode 100644 index 0000000000000000000000000000000000000000..4837fe20ad8000ff00cbc506df0961a9471d3bc5 --- /dev/null +++ b/tests/integration_tests/test_nlp_text_splitters.py @@ -0,0 +1,36 @@ +"""Test text splitting functionality using NLTK and Spacy based sentence splitters.""" +import pytest + +from langchain.text_splitter import NLTKTextSplitter, SpacyTextSplitter + + +def test_nltk_text_splitting_args() -> None: + """Test invalid arguments.""" + with pytest.raises(ValueError): + NLTKTextSplitter(chunk_size=2, chunk_overlap=4) + + +def test_spacy_text_splitting_args() -> None: + """Test invalid arguments.""" + with pytest.raises(ValueError): + SpacyTextSplitter(chunk_size=2, chunk_overlap=4) + + +def test_nltk_text_splitter() -> None: + """Test splitting by sentence using NLTK.""" + text = "This is sentence one. And this is sentence two." + separator = "|||" + splitter = NLTKTextSplitter(separator=separator) + output = splitter.split_text(text) + expected_output = [f"This is sentence one.{separator}And this is sentence two."] + assert output == expected_output + + +def test_spacy_text_splitter() -> None: + """Test splitting by sentence using Spacy.""" + text = "This is sentence one. And this is sentence two." + separator = "|||" + splitter = SpacyTextSplitter(separator=separator) + output = splitter.split_text(text) + expected_output = [f"This is sentence one.{separator}And this is sentence two."] + assert output == expected_output diff --git a/tests/integration_tests/test_pdf_pagesplitter.py b/tests/integration_tests/test_pdf_pagesplitter.py new file mode 100644 index 0000000000000000000000000000000000000000..f86c2f166a7c312b46c3b9a9509cba9f486a43fb --- /dev/null +++ b/tests/integration_tests/test_pdf_pagesplitter.py @@ -0,0 +1,19 @@ +"""Test splitting with page numbers included.""" +import os + +from langchain.document_loaders import PyPDFLoader +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores import FAISS + + +def test_pdf_pagesplitter() -> None: + """Test splitting with page numbers included.""" + script_dir = os.path.dirname(__file__) + loader = PyPDFLoader(os.path.join(script_dir, "examples/hello.pdf")) + docs = loader.load() + assert "page" in docs[0].metadata + assert "source" in docs[0].metadata + + faiss_index = FAISS.from_documents(docs, OpenAIEmbeddings()) + docs = faiss_index.similarity_search("Complete this sentence: Hello", k=1) + assert "Hello world" in docs[0].page_content diff --git a/tests/integration_tests/test_serpapi.py b/tests/integration_tests/test_serpapi.py new file mode 100644 index 0000000000000000000000000000000000000000..2e3d342716d40aded4f29fb0704fe4b287c2af04 --- /dev/null +++ b/tests/integration_tests/test_serpapi.py @@ -0,0 +1,9 @@ +"""Integration test for SerpAPI.""" +from langchain.utilities import SerpAPIWrapper + + +def test_call() -> None: + """Test that call gives the correct answer.""" + chain = SerpAPIWrapper() + output = chain.run("What was Obama's first name?") + assert output == "Barack Hussein Obama II" diff --git a/tests/integration_tests/test_text_splitter.py b/tests/integration_tests/test_text_splitter.py new file mode 100644 index 0000000000000000000000000000000000000000..367899aa9eff5dd7c602d7f882e7c7f188df3a4e --- /dev/null +++ b/tests/integration_tests/test_text_splitter.py @@ -0,0 +1,41 @@ +"""Test text splitters that require an integration.""" + +import pytest + +from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter + + +def test_huggingface_type_check() -> None: + """Test that type checks are done properly on input.""" + with pytest.raises(ValueError): + CharacterTextSplitter.from_huggingface_tokenizer("foo") + + +def test_huggingface_tokenizer() -> None: + """Test text splitter that uses a HuggingFace tokenizer.""" + from transformers import GPT2TokenizerFast + + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + text_splitter = CharacterTextSplitter.from_huggingface_tokenizer( + tokenizer, separator=" ", chunk_size=1, chunk_overlap=0 + ) + output = text_splitter.split_text("foo bar") + assert output == ["foo", "bar"] + + +class TestTokenTextSplitter: + """Test token text splitter.""" + + def test_basic(self) -> None: + """Test no overlap.""" + splitter = TokenTextSplitter(chunk_size=5, chunk_overlap=0) + output = splitter.split_text("abcdef" * 5) # 10 token string + expected_output = ["abcdefabcdefabc", "defabcdefabcdef"] + assert output == expected_output + + def test_overlap(self) -> None: + """Test with overlap.""" + splitter = TokenTextSplitter(chunk_size=5, chunk_overlap=1) + output = splitter.split_text("abcdef" * 5) # 10 token string + expected_output = ["abcdefabcdefabc", "abcdefabcdefabc", "abcdef"] + assert output == expected_output diff --git a/tests/integration_tests/test_wolfram_alpha_api.py b/tests/integration_tests/test_wolfram_alpha_api.py new file mode 100644 index 0000000000000000000000000000000000000000..223ec0a3d56cbb11635b2304a55b8273480c9a0a --- /dev/null +++ b/tests/integration_tests/test_wolfram_alpha_api.py @@ -0,0 +1,9 @@ +"""Integration test for Wolfram Alpha API Wrapper.""" +from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper + + +def test_call() -> None: + """Test that call gives the correct answer.""" + search = WolframAlphaAPIWrapper() + output = search.run("what is 2x+18=x+5?") + assert "x = -13" in output diff --git a/tests/integration_tests/vectorstores/__init__.py b/tests/integration_tests/vectorstores/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4396bb230490ab8f5aa03cc39b82e7f4d2819ce8 --- /dev/null +++ b/tests/integration_tests/vectorstores/__init__.py @@ -0,0 +1 @@ +"""Test vectorstores.""" diff --git a/tests/integration_tests/vectorstores/fake_embeddings.py b/tests/integration_tests/vectorstores/fake_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..17a81e0493c0e98a4791fc50acf3662cf0f2a0aa --- /dev/null +++ b/tests/integration_tests/vectorstores/fake_embeddings.py @@ -0,0 +1,22 @@ +"""Fake Embedding class for testing purposes.""" +from typing import List + +from langchain.embeddings.base import Embeddings + +fake_texts = ["foo", "bar", "baz"] + + +class FakeEmbeddings(Embeddings): + """Fake embeddings functionality for testing.""" + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return simple embeddings. + Embeddings encode each text as its index.""" + return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))] + + def embed_query(self, text: str) -> List[float]: + """Return constant query embeddings. + Embeddings are identical to embed_documents(texts)[0]. + Distance to each text will be that text's index, + as it was passed to embed_documents.""" + return [float(1.0)] * 9 + [float(0.0)] diff --git a/tests/integration_tests/vectorstores/test_atlas.py b/tests/integration_tests/vectorstores/test_atlas.py new file mode 100644 index 0000000000000000000000000000000000000000..2a7c99e8181fff0791131648c4a07558f631b98a --- /dev/null +++ b/tests/integration_tests/vectorstores/test_atlas.py @@ -0,0 +1,40 @@ +"""Test Atlas functionality.""" +import time + +from langchain.vectorstores import AtlasDB +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +ATLAS_TEST_API_KEY = "7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6" + + +def test_atlas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = AtlasDB.from_texts( + name="langchain_test_project" + str(time.time()), + texts=texts, + api_key=ATLAS_TEST_API_KEY, + embedding=FakeEmbeddings(), + ) + output = docsearch.similarity_search("foo", k=1) + assert len(output) == 1 + assert output[0].page_content == "foo" + + +def test_atlas_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = AtlasDB.from_texts( + name="langchain_test_project" + str(time.time()), + texts=texts, + api_key=ATLAS_TEST_API_KEY, + embedding=FakeEmbeddings(), + metadatas=metadatas, + reset_project_if_exists=True, + ) + + output = docsearch.similarity_search("foo", k=1) + assert len(output) == 1 + assert output[0].page_content == "foo" + assert output[0].metadata["page"] == "0" diff --git a/tests/integration_tests/vectorstores/test_chroma.py b/tests/integration_tests/vectorstores/test_chroma.py new file mode 100644 index 0000000000000000000000000000000000000000..9cb07599d45e84876281b1305a4c5313151b3591 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_chroma.py @@ -0,0 +1,115 @@ +"""Test Chroma functionality.""" +from langchain.docstore.document import Document +from langchain.vectorstores import Chroma +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_chroma() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = Chroma.from_texts( + collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_chroma_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), + metadatas=metadatas, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": "0"})] + + +def test_chroma_with_metadatas_with_scores() -> None: + """Test end to end construction and scored search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), + metadatas=metadatas, + ) + output = docsearch.similarity_search_with_score("foo", k=1) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] + + +def test_chroma_search_filter() -> None: + """Test end to end construction and search with metadata filtering.""" + texts = ["far", "bar", "baz"] + metadatas = [{"first_letter": "{}".format(text[0])} for text in texts] + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), + metadatas=metadatas, + ) + output = docsearch.similarity_search("far", k=1, filter={"first_letter": "f"}) + assert output == [Document(page_content="far", metadata={"first_letter": "f"})] + output = docsearch.similarity_search("far", k=1, filter={"first_letter": "b"}) + assert output == [Document(page_content="bar", metadata={"first_letter": "b"})] + + +def test_chroma_search_filter_with_scores() -> None: + """Test end to end construction and scored search with metadata filtering.""" + texts = ["far", "bar", "baz"] + metadatas = [{"first_letter": "{}".format(text[0])} for text in texts] + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), + metadatas=metadatas, + ) + output = docsearch.similarity_search_with_score( + "far", k=1, filter={"first_letter": "f"} + ) + assert output == [ + (Document(page_content="far", metadata={"first_letter": "f"}), 0.0) + ] + output = docsearch.similarity_search_with_score( + "far", k=1, filter={"first_letter": "b"} + ) + assert output == [ + (Document(page_content="bar", metadata={"first_letter": "b"}), 1.0) + ] + + +def test_chroma_with_persistence() -> None: + """Test end to end construction and search, with persistence.""" + chroma_persist_dir = "./tests/persist_dir" + collection_name = "test_collection" + texts = ["foo", "bar", "baz"] + docsearch = Chroma.from_texts( + collection_name=collection_name, + texts=texts, + embedding=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ) + + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + docsearch.persist() + + # Get a new VectorStore from the persisted directory + docsearch = Chroma( + collection_name=collection_name, + embedding_function=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ) + output = docsearch.similarity_search("foo", k=1) + + # Clean up + docsearch.delete_collection() + + # Persist doesn't need to be called again + # Data will be automatically persisted on object deletion + # Or on program exit diff --git a/tests/integration_tests/vectorstores/test_deeplake.py b/tests/integration_tests/vectorstores/test_deeplake.py new file mode 100644 index 0000000000000000000000000000000000000000..a8316f2167f9dc5a86efb76929b611f9a85eba59 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_deeplake.py @@ -0,0 +1,58 @@ +"""Test Deep Lake functionality.""" +from langchain.docstore.document import Document +from langchain.vectorstores import DeepLake +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_deeplake() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = DeepLake.from_texts( + dataset_path="mem://test_path", texts=texts, embedding=FakeEmbeddings() + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_deeplake_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = DeepLake.from_texts( + dataset_path="mem://test_path", + texts=texts, + embedding=FakeEmbeddings(), + metadatas=metadatas, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": "0"})] + + +def test_deeplakewith_persistence() -> None: + """Test end to end construction and search, with persistence.""" + dataset_path = "./tests/persist_dir" + texts = ["foo", "bar", "baz"] + docsearch = DeepLake.from_texts( + dataset_path=dataset_path, + texts=texts, + embedding=FakeEmbeddings(), + ) + + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + docsearch.persist() + + # Get a new VectorStore from the persisted directory + docsearch = DeepLake( + dataset_path=dataset_path, + embedding_function=FakeEmbeddings(), + ) + output = docsearch.similarity_search("foo", k=1) + + # Clean up + docsearch.delete_dataset() + + # Persist doesn't need to be called again + # Data will be automatically persisted on object deletion + # Or on program exit diff --git a/tests/integration_tests/vectorstores/test_elasticsearch.py b/tests/integration_tests/vectorstores/test_elasticsearch.py new file mode 100644 index 0000000000000000000000000000000000000000..075fab4adae6f2ce90d5aa34839a5b356c3d1bc0 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_elasticsearch.py @@ -0,0 +1,29 @@ +"""Test ElasticSearch functionality.""" + +from langchain.docstore.document import Document +from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_elasticsearch() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = ElasticVectorSearch.from_texts( + texts, FakeEmbeddings(), elasticsearch_url="http://localhost:9200" + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_elasticsearch_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = ElasticVectorSearch.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + elasticsearch_url="http://localhost:9200", + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": 0})] diff --git a/tests/integration_tests/vectorstores/test_faiss.py b/tests/integration_tests/vectorstores/test_faiss.py new file mode 100644 index 0000000000000000000000000000000000000000..d1fc9c5e886de9f2683cc169bc5c54fe46feadd6 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_faiss.py @@ -0,0 +1,111 @@ +"""Test FAISS functionality.""" +import tempfile + +import pytest + +from langchain.docstore.document import Document +from langchain.docstore.in_memory import InMemoryDocstore +from langchain.docstore.wikipedia import Wikipedia +from langchain.vectorstores.faiss import FAISS +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_faiss() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + index_to_id = docsearch.index_to_docstore_id + expected_docstore = InMemoryDocstore( + { + index_to_id[0]: Document(page_content="foo"), + index_to_id[1]: Document(page_content="bar"), + index_to_id[2]: Document(page_content="baz"), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_faiss_vector_sim() -> None: + """Test vector similarity.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + index_to_id = docsearch.index_to_docstore_id + expected_docstore = InMemoryDocstore( + { + index_to_id[0]: Document(page_content="foo"), + index_to_id[1]: Document(page_content="bar"), + index_to_id[2]: Document(page_content="baz"), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + query_vec = FakeEmbeddings().embed_query(text="foo") + output = docsearch.similarity_search_by_vector(query_vec, k=1) + assert output == [Document(page_content="foo")] + + # make sure we can have k > docstore size + output = docsearch.max_marginal_relevance_search_by_vector(query_vec, k=10) + assert len(output) == len(texts) + + +def test_faiss_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) + expected_docstore = InMemoryDocstore( + { + docsearch.index_to_docstore_id[0]: Document( + page_content="foo", metadata={"page": 0} + ), + docsearch.index_to_docstore_id[1]: Document( + page_content="bar", metadata={"page": 1} + ), + docsearch.index_to_docstore_id[2]: Document( + page_content="baz", metadata={"page": 2} + ), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": 0})] + + +def test_faiss_search_not_found() -> None: + """Test what happens when document is not found.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + # Get rid of the docstore to purposefully induce errors. + docsearch.docstore = InMemoryDocstore({}) + with pytest.raises(ValueError): + docsearch.similarity_search("foo") + + +def test_faiss_add_texts() -> None: + """Test end to end adding of texts.""" + # Create initial doc store. + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + # Test adding a similar document as before. + docsearch.add_texts(["foo"]) + output = docsearch.similarity_search("foo", k=2) + assert output == [Document(page_content="foo"), Document(page_content="foo")] + + +def test_faiss_add_texts_not_supported() -> None: + """Test adding of texts to a docstore that doesn't support it.""" + docsearch = FAISS(FakeEmbeddings().embed_query, None, Wikipedia(), {}) + with pytest.raises(ValueError): + docsearch.add_texts(["foo"]) + + +def test_faiss_local_save_load() -> None: + """Test end to end serialization.""" + texts = ["foo", "bar", "baz"] + docsearch = FAISS.from_texts(texts, FakeEmbeddings()) + + with tempfile.NamedTemporaryFile() as temp_file: + docsearch.save_local(temp_file.name) + new_docsearch = FAISS.load_local(temp_file.name, FakeEmbeddings()) + assert new_docsearch.index is not None diff --git a/tests/integration_tests/vectorstores/test_milvus.py b/tests/integration_tests/vectorstores/test_milvus.py new file mode 100644 index 0000000000000000000000000000000000000000..063427e76639c4c812cc2b6a7a2b2d709e1023ba --- /dev/null +++ b/tests/integration_tests/vectorstores/test_milvus.py @@ -0,0 +1,53 @@ +"""Test Milvus functionality.""" +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.vectorstores import Milvus +from tests.integration_tests.vectorstores.fake_embeddings import ( + FakeEmbeddings, + fake_texts, +) + + +def _milvus_from_texts(metadatas: Optional[List[dict]] = None) -> Milvus: + return Milvus.from_texts( + fake_texts, + FakeEmbeddings(), + metadatas=metadatas, + connection_args={"host": "127.0.0.1", "port": "19530"}, + ) + + +def test_milvus() -> None: + """Test end to end construction and search.""" + docsearch = _milvus_from_texts() + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_milvus_with_score() -> None: + """Test end to end construction and search with scores and IDs.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + output = docsearch.similarity_search_with_score("foo", k=3) + docs = [o[0] for o in output] + scores = [o[1] for o in output] + assert docs == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + Document(page_content="baz", metadata={"page": 2}), + ] + assert scores[0] < scores[1] < scores[2] + + +def test_milvus_max_marginal_relevance_search() -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3) + assert output == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="baz", metadata={"page": 2}), + ] diff --git a/tests/integration_tests/vectorstores/test_opensearch.py b/tests/integration_tests/vectorstores/test_opensearch.py new file mode 100644 index 0000000000000000000000000000000000000000..efa1d9d7c356d69f245b39dc897e46873f81a347 --- /dev/null +++ b/tests/integration_tests/vectorstores/test_opensearch.py @@ -0,0 +1,128 @@ +"""Test OpenSearch functionality.""" + +import pytest + +from langchain.docstore.document import Document +from langchain.vectorstores.opensearch_vector_search import ( + PAINLESS_SCRIPTING_SEARCH, + SCRIPT_SCORING_SEARCH, + OpenSearchVectorSearch, +) +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +DEFAULT_OPENSEARCH_URL = "http://localhost:9200" +texts = ["foo", "bar", "baz"] + + +def test_opensearch() -> None: + """Test end to end indexing and search using Approximate Search.""" + docsearch = OpenSearchVectorSearch.from_texts( + texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_opensearch_with_metadatas() -> None: + """Test end to end indexing and search with metadata.""" + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = OpenSearchVectorSearch.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + opensearch_url=DEFAULT_OPENSEARCH_URL, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": 0})] + + +def test_add_text() -> None: + """Test adding additional text elements to existing index.""" + text_input = ["test", "add", "text", "method"] + metadatas = [{"page": i} for i in range(len(text_input))] + docsearch = OpenSearchVectorSearch.from_texts( + texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL + ) + docids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas) + assert len(docids) == len(text_input) + + +def test_opensearch_script_scoring() -> None: + """Test end to end indexing and search using Script Scoring Search.""" + pre_filter_val = {"bool": {"filter": {"term": {"text": "bar"}}}} + docsearch = OpenSearchVectorSearch.from_texts( + texts, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + is_appx_search=False, + ) + output = docsearch.similarity_search( + "foo", k=1, search_type=SCRIPT_SCORING_SEARCH, pre_filter=pre_filter_val + ) + assert output == [Document(page_content="bar")] + + +def test_add_text_script_scoring() -> None: + """Test adding additional text elements and validating using Script Scoring.""" + text_input = ["test", "add", "text", "method"] + metadatas = [{"page": i} for i in range(len(text_input))] + docsearch = OpenSearchVectorSearch.from_texts( + text_input, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + is_appx_search=False, + ) + OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas) + output = docsearch.similarity_search( + "add", k=1, search_type=SCRIPT_SCORING_SEARCH, space_type="innerproduct" + ) + assert output == [Document(page_content="test")] + + +def test_opensearch_painless_scripting() -> None: + """Test end to end indexing and search using Painless Scripting Search.""" + pre_filter_val = {"bool": {"filter": {"term": {"text": "baz"}}}} + docsearch = OpenSearchVectorSearch.from_texts( + texts, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + is_appx_search=False, + ) + output = docsearch.similarity_search( + "foo", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, pre_filter=pre_filter_val + ) + assert output == [Document(page_content="baz")] + + +def test_add_text_painless_scripting() -> None: + """Test adding additional text elements and validating using Painless Scripting.""" + text_input = ["test", "add", "text", "method"] + metadatas = [{"page": i} for i in range(len(text_input))] + docsearch = OpenSearchVectorSearch.from_texts( + text_input, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + is_appx_search=False, + ) + OpenSearchVectorSearch.add_texts(docsearch, texts, metadatas) + output = docsearch.similarity_search( + "add", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, space_type="cosineSimilarity" + ) + assert output == [Document(page_content="test")] + + +def test_opensearch_invalid_search_type() -> None: + """Test to validate similarity_search by providing invalid search_type.""" + docsearch = OpenSearchVectorSearch.from_texts( + texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL + ) + with pytest.raises(ValueError): + docsearch.similarity_search("foo", k=1, search_type="invalid_search_type") + + +def test_opensearch_embedding_size_zero() -> None: + """Test to validate indexing when embedding size is zero.""" + with pytest.raises(RuntimeError): + OpenSearchVectorSearch.from_texts( + [], FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL + ) diff --git a/tests/integration_tests/vectorstores/test_pgvector.py b/tests/integration_tests/vectorstores/test_pgvector.py new file mode 100644 index 0000000000000000000000000000000000000000..023d04d9ecb97856ef0c621bd11f0155f4e6fd7d --- /dev/null +++ b/tests/integration_tests/vectorstores/test_pgvector.py @@ -0,0 +1,151 @@ +"""Test PGVector functionality.""" +import os +from typing import List + +from sqlalchemy.orm import Session + +from langchain.docstore.document import Document +from langchain.vectorstores.pgvector import PGVector +from tests.integration_tests.vectorstores.fake_embeddings import ( + FakeEmbeddings, +) + +CONNECTION_STRING = PGVector.connection_string_from_db_params( + driver=os.environ.get("TEST_PGVECTOR_DRIVER", "psycopg2"), + host=os.environ.get("TEST_PGVECTOR_HOST", "localhost"), + port=int(os.environ.get("TEST_PGVECTOR_PORT", "5432")), + database=os.environ.get("TEST_PGVECTOR_DATABASE", "postgres"), + user=os.environ.get("TEST_PGVECTOR_USER", "postgres"), + password=os.environ.get("TEST_PGVECTOR_PASSWORD", "postgres"), +) + + +ADA_TOKEN_COUNT = 1536 + + +class FakeEmbeddingsWithAdaDimension(FakeEmbeddings): + """Fake embeddings functionality for testing.""" + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return simple embeddings.""" + return [ + [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(i)] for i in range(len(texts)) + ] + + def embed_query(self, text: str) -> List[float]: + """Return simple embeddings.""" + return [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(0.0)] + + +def test_pgvector() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection", + embedding=FakeEmbeddingsWithAdaDimension(), + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_pgvector_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection", + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": "0"})] + + +def test_pgvector_with_metadatas_with_scores() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection", + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search_with_score("foo", k=1) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] + + +def test_pgvector_with_filter_match() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection_filter", + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"}) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] + + +def test_pgvector_with_filter_distant_match() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection_filter", + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"}) + assert output == [ + (Document(page_content="baz", metadata={"page": "2"}), 0.0013003906671379406) + ] + + +def test_pgvector_with_filter_no_match() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = PGVector.from_texts( + texts=texts, + collection_name="test_collection_filter", + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "5"}) + assert output == [] + + +def test_pgvector_collection_with_metadata() -> None: + """Test end to end collection construction""" + pgvector = PGVector( + collection_name="test_collection", + collection_metadata={"foo": "bar"}, + embedding_function=FakeEmbeddingsWithAdaDimension(), + connection_string=CONNECTION_STRING, + pre_delete_collection=True, + ) + session = Session(pgvector.connect()) + collection = pgvector.get_collection(session) + if collection is None: + assert False, "Expected a CollectionStore object but received None" + else: + assert collection.name == "test_collection" + assert collection.cmetadata == {"foo": "bar"} diff --git a/tests/integration_tests/vectorstores/test_pinecone.py b/tests/integration_tests/vectorstores/test_pinecone.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfe4104dd10e036b9e8c79bfc81d7d9d731b81b --- /dev/null +++ b/tests/integration_tests/vectorstores/test_pinecone.py @@ -0,0 +1,97 @@ +"""Test Pinecone functionality.""" +import pinecone + +from langchain.docstore.document import Document +from langchain.vectorstores.pinecone import Pinecone +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV") + +# if the index already exists, delete it +try: + pinecone.delete_index("langchain-demo") +except Exception: + pass +index = pinecone.Index("langchain-demo") + + +def test_pinecone() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = Pinecone.from_texts( + texts, FakeEmbeddings(), index_name="langchain-demo", namespace="test" + ) + output = docsearch.similarity_search("foo", k=1, namespace="test") + assert output == [Document(page_content="foo")] + + +def test_pinecone_with_metadatas() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Pinecone.from_texts( + texts, + FakeEmbeddings(), + index_name="langchain-demo", + metadatas=metadatas, + namespace="test-metadata", + ) + output = docsearch.similarity_search("foo", k=1, namespace="test-metadata") + assert output == [Document(page_content="foo", metadata={"page": 0})] + + +def test_pinecone_with_scores() -> None: + """Test end to end construction and search with scores and IDs.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Pinecone.from_texts( + texts, + FakeEmbeddings(), + index_name="langchain-demo", + metadatas=metadatas, + namespace="test-metadata-score", + ) + output = docsearch.similarity_search_with_score( + "foo", k=3, namespace="test-metadata-score" + ) + docs = [o[0] for o in output] + scores = [o[1] for o in output] + assert docs == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + Document(page_content="baz", metadata={"page": 2}), + ] + assert scores[0] > scores[1] > scores[2] + + +def test_pinecone_with_namespaces() -> None: + "Test that namespaces are properly handled." "" + # Create two indexes with the same name but different namespaces + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + Pinecone.from_texts( + texts, + FakeEmbeddings(), + index_name="langchain-demo", + metadatas=metadatas, + namespace="test-namespace", + ) + + texts = ["foo2", "bar2", "baz2"] + metadatas = [{"page": i} for i in range(len(texts))] + Pinecone.from_texts( + texts, + FakeEmbeddings(), + index_name="langchain-demo", + metadatas=metadatas, + namespace="test-namespace2", + ) + + # Search with namespace + docsearch = Pinecone.from_existing_index( + "langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace" + ) + output = docsearch.similarity_search("foo", k=6) + # check that we don't get results from the other namespace + page_contents = [o.page_content for o in output] + assert set(page_contents) == set(["foo", "bar", "baz"]) diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9498b7b36da72534ecc668bbbd9f8c4dc5087d --- /dev/null +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -0,0 +1,100 @@ +"""Test Qdrant functionality.""" +import pytest + +from langchain.docstore.document import Document +from langchain.vectorstores import Qdrant +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +@pytest.mark.parametrize( + ["content_payload_key", "metadata_payload_key"], + [ + (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY), + ("foo", "bar"), + (Qdrant.CONTENT_KEY, "bar"), + ("foo", Qdrant.METADATA_KEY), + ], +) +def test_qdrant(content_payload_key: str, metadata_payload_key: str) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + host="localhost", + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +@pytest.mark.parametrize( + ["content_payload_key", "metadata_payload_key"], + [ + (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY), + ("test_content", "test_payload"), + (Qdrant.CONTENT_KEY, "payload_test"), + ("content_test", Qdrant.METADATA_KEY), + ], +) +def test_qdrant_with_metadatas( + content_payload_key: str, metadata_payload_key: str +) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + host="localhost", + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={"page": 0})] + + +def test_qdrant_similarity_search_filters() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + host="localhost", + ) + output = docsearch.similarity_search("foo", k=1, filter={"page": 1}) + assert output == [Document(page_content="bar", metadata={"page": 1})] + + +@pytest.mark.parametrize( + ["content_payload_key", "metadata_payload_key"], + [ + (Qdrant.CONTENT_KEY, Qdrant.METADATA_KEY), + ("test_content", "test_payload"), + (Qdrant.CONTENT_KEY, "payload_test"), + ("content_test", Qdrant.METADATA_KEY), + ], +) +def test_qdrant_max_marginal_relevance_search( + content_payload_key: str, metadata_payload_key: str +) -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + host="localhost", + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + ) + output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3) + assert output == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + ] diff --git a/tests/integration_tests/vectorstores/test_redis.py b/tests/integration_tests/vectorstores/test_redis.py new file mode 100644 index 0000000000000000000000000000000000000000..15d5651a7bc45ba4a3eae7e2e2c0e51a4611c23d --- /dev/null +++ b/tests/integration_tests/vectorstores/test_redis.py @@ -0,0 +1,26 @@ +"""Test Redis functionality.""" + +from langchain.docstore.document import Document +from langchain.vectorstores.redis import Redis +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + + +def test_redis() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = Redis.from_texts( + texts, FakeEmbeddings(), redis_url="redis://localhost:6379" + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + +def test_redis_new_vector() -> None: + """Test adding a new document""" + texts = ["foo", "bar", "baz"] + docsearch = Redis.from_texts( + texts, FakeEmbeddings(), redis_url="redis://localhost:6379" + ) + docsearch.add_texts(["foo"]) + output = docsearch.similarity_search("foo", k=2) + assert output == [Document(page_content="foo"), Document(page_content="foo")] diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..307b508544b0b8a8a044137a78f7c7a02b3bc5b7 --- /dev/null +++ b/tests/unit_tests/__init__.py @@ -0,0 +1 @@ +"""All unit tests (lightweight tests).""" diff --git a/tests/unit_tests/agents/__init__.py b/tests/unit_tests/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06a0d862611b4fd4712a385e2dbe8242635dc899 --- /dev/null +++ b/tests/unit_tests/agents/__init__.py @@ -0,0 +1 @@ +"""Test agent functionality.""" diff --git a/tests/unit_tests/agents/test_agent.py b/tests/unit_tests/agents/test_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..e9d0831779e8aaafa5109e5ec82c18e52c9de862 --- /dev/null +++ b/tests/unit_tests/agents/test_agent.py @@ -0,0 +1,291 @@ +"""Unit tests for agents.""" + +from typing import Any, List, Mapping, Optional + +from pydantic import BaseModel + +from langchain.agents import AgentExecutor, initialize_agent +from langchain.agents.tools import Tool +from langchain.callbacks.base import CallbackManager +from langchain.llms.base import LLM +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +class FakeListLLM(LLM, BaseModel): + """Fake LLM for testing that outputs elements of a list.""" + + responses: List[str] + i: int = -1 + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Increment counter, and then return response in that index.""" + self.i += 1 + print(f"=== Mock Response #{self.i} ===") + print(self.responses[self.i]) + return self.responses[self.i] + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {} + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake_list" + + +def _get_agent(**kwargs: Any) -> AgentExecutor: + """Get agent for testing.""" + bad_action_name = "BadAction" + responses = [ + f"I'm turning evil\nAction: {bad_action_name}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + ), + Tool( + name="Lookup", + func=lambda x: x, + description="Useful for looking up things in a table", + ), + ] + agent = initialize_agent( + tools, fake_llm, agent="zero-shot-react-description", verbose=True, **kwargs + ) + return agent + + +def test_agent_bad_action() -> None: + """Test react chain when bad action given.""" + agent = _get_agent() + output = agent.run("when was langchain made") + assert output == "curses foiled again" + + +def test_agent_stopped_early() -> None: + """Test react chain when bad action given.""" + agent = _get_agent(max_iterations=0) + output = agent.run("when was langchain made") + assert output == "Agent stopped due to max iterations." + + +def test_agent_with_callbacks_global() -> None: + """Test react chain with callbacks by setting verbose globally.""" + import langchain + + langchain.verbose = True + handler = FakeCallbackHandler() + manager = CallbackManager(handlers=[handler]) + tool = "Search" + responses = [ + f"FooBarBaz\nAction: {tool}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses, callback_manager=manager, verbose=True) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + callback_manager=manager, + ), + ] + agent = initialize_agent( + tools, + fake_llm, + agent="zero-shot-react-description", + verbose=True, + callback_manager=manager, + ) + + output = agent.run("when was langchain made") + assert output == "curses foiled again" + + # 1 top level chain run runs, 2 LLMChain runs, 2 LLM runs, 1 tool run + assert handler.chain_starts == handler.chain_ends == 3 + assert handler.llm_starts == handler.llm_ends == 2 + assert handler.tool_starts == 2 + assert handler.tool_ends == 1 + # 1 extra agent action + assert handler.starts == 7 + # 1 extra agent end + assert handler.ends == 7 + assert handler.errors == 0 + # during LLMChain + assert handler.text == 2 + + +def test_agent_with_callbacks_local() -> None: + """Test react chain with callbacks by setting verbose locally.""" + import langchain + + langchain.verbose = False + handler = FakeCallbackHandler() + manager = CallbackManager(handlers=[handler]) + tool = "Search" + responses = [ + f"FooBarBaz\nAction: {tool}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses, callback_manager=manager, verbose=True) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + callback_manager=manager, + ), + ] + agent = initialize_agent( + tools, + fake_llm, + agent="zero-shot-react-description", + verbose=True, + callback_manager=manager, + ) + + agent.agent.llm_chain.verbose = True + + output = agent.run("when was langchain made") + assert output == "curses foiled again" + + # 1 top level chain run, 2 LLMChain starts, 2 LLM runs, 1 tool run + assert handler.chain_starts == handler.chain_ends == 3 + assert handler.llm_starts == handler.llm_ends == 2 + assert handler.tool_starts == 2 + assert handler.tool_ends == 1 + # 1 extra agent action + assert handler.starts == 7 + # 1 extra agent end + assert handler.ends == 7 + assert handler.errors == 0 + # during LLMChain + assert handler.text == 2 + + +def test_agent_with_callbacks_not_verbose() -> None: + """Test react chain with callbacks but not verbose.""" + import langchain + + langchain.verbose = False + handler = FakeCallbackHandler() + manager = CallbackManager(handlers=[handler]) + tool = "Search" + responses = [ + f"FooBarBaz\nAction: {tool}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses, callback_manager=manager) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + ), + ] + agent = initialize_agent( + tools, + fake_llm, + agent="zero-shot-react-description", + callback_manager=manager, + ) + + output = agent.run("when was langchain made") + assert output == "curses foiled again" + + # 1 top level chain run, 2 LLMChain runs, 2 LLM runs, 1 tool run + assert handler.starts == 0 + assert handler.ends == 0 + assert handler.errors == 0 + + +def test_agent_tool_return_direct() -> None: + """Test agent using tools that return directly.""" + tool = "Search" + responses = [ + f"FooBarBaz\nAction: {tool}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + return_direct=True, + ), + ] + agent = initialize_agent( + tools, + fake_llm, + agent="zero-shot-react-description", + ) + + output = agent.run("when was langchain made") + assert output == "misalignment" + + +def test_agent_tool_return_direct_in_intermediate_steps() -> None: + """Test agent using tools that return directly.""" + tool = "Search" + responses = [ + f"FooBarBaz\nAction: {tool}\nAction Input: misalignment", + "Oh well\nAction: Final Answer\nAction Input: curses foiled again", + ] + fake_llm = FakeListLLM(responses=responses) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + return_direct=True, + ), + ] + agent = initialize_agent( + tools, + fake_llm, + agent="zero-shot-react-description", + return_intermediate_steps=True, + ) + + resp = agent("when was langchain made") + assert resp["output"] == "misalignment" + assert len(resp["intermediate_steps"]) == 1 + action, _action_intput = resp["intermediate_steps"][0] + assert action.tool == "Search" + + +def test_agent_with_new_prefix_suffix() -> None: + """Test agent initilization kwargs with new prefix and suffix.""" + fake_llm = FakeListLLM( + responses=["FooBarBaz\nAction: Search\nAction Input: misalignment"] + ) + tools = [ + Tool( + name="Search", + func=lambda x: x, + description="Useful for searching", + return_direct=True, + ), + ] + prefix = "FooBarBaz" + + suffix = "Begin now!\nInput: {input}\nThought: {agent_scratchpad}" + + agent = initialize_agent( + tools=tools, + llm=fake_llm, + agent="zero-shot-react-description", + agent_kwargs={"prefix": prefix, "suffix": suffix}, + ) + + # avoids "BasePromptTemplate" has no attribute "template" error + assert hasattr(agent.agent.llm_chain.prompt, "template") + prompt_str = agent.agent.llm_chain.prompt.template + assert prompt_str.startswith(prefix), "Prompt does not start with prefix" + assert prompt_str.endswith(suffix), "Prompt does not end with suffix" diff --git a/tests/unit_tests/agents/test_mrkl.py b/tests/unit_tests/agents/test_mrkl.py new file mode 100644 index 0000000000000000000000000000000000000000..6c87f91453cf0de496b22160c82814facb751942 --- /dev/null +++ b/tests/unit_tests/agents/test_mrkl.py @@ -0,0 +1,110 @@ +"""Test MRKL functionality.""" + +import pytest + +from langchain.agents.mrkl.base import ZeroShotAgent, get_action_and_input +from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX +from langchain.agents.tools import Tool +from langchain.prompts import PromptTemplate +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_get_action_and_input() -> None: + """Test getting an action from text.""" + llm_output = ( + "Thought: I need to search for NBA\n" "Action: Search\n" "Action Input: NBA" + ) + action, action_input = get_action_and_input(llm_output) + assert action == "Search" + assert action_input == "NBA" + + +def test_get_action_and_input_whitespace() -> None: + """Test getting an action from text.""" + llm_output = "Thought: I need to search for NBA\nAction: Search \nAction Input: NBA" + action, action_input = get_action_and_input(llm_output) + assert action == "Search" + assert action_input == "NBA" + + +def test_get_final_answer() -> None: + """Test getting final answer.""" + llm_output = ( + "Thought: I need to search for NBA\n" + "Action: Search\n" + "Action Input: NBA\n" + "Observation: founded in 1994\n" + "Thought: I can now answer the question\n" + "Final Answer: 1994" + ) + action, action_input = get_action_and_input(llm_output) + assert action == "Final Answer" + assert action_input == "1994" + + +def test_get_final_answer_new_line() -> None: + """Test getting final answer.""" + llm_output = ( + "Thought: I need to search for NBA\n" + "Action: Search\n" + "Action Input: NBA\n" + "Observation: founded in 1994\n" + "Thought: I can now answer the question\n" + "Final Answer:\n1994" + ) + action, action_input = get_action_and_input(llm_output) + assert action == "Final Answer" + assert action_input == "1994" + + +def test_get_final_answer_multiline() -> None: + """Test getting final answer that is multiline.""" + llm_output = ( + "Thought: I need to search for NBA\n" + "Action: Search\n" + "Action Input: NBA\n" + "Observation: founded in 1994 and 1993\n" + "Thought: I can now answer the question\n" + "Final Answer: 1994\n1993" + ) + action, action_input = get_action_and_input(llm_output) + assert action == "Final Answer" + assert action_input == "1994\n1993" + + +def test_bad_action_input_line() -> None: + """Test handling when no action input found.""" + llm_output = "Thought: I need to search for NBA\n" "Action: Search\n" "Thought: NBA" + with pytest.raises(ValueError): + get_action_and_input(llm_output) + + +def test_bad_action_line() -> None: + """Test handling when no action input found.""" + llm_output = ( + "Thought: I need to search for NBA\n" "Thought: Search\n" "Action Input: NBA" + ) + with pytest.raises(ValueError): + get_action_and_input(llm_output) + + +def test_from_chains() -> None: + """Test initializing from chains.""" + chain_configs = [ + Tool(name="foo", func=lambda x: "foo", description="foobar1"), + Tool(name="bar", func=lambda x: "bar", description="foobar2"), + ] + agent = ZeroShotAgent.from_llm_and_tools(FakeLLM(), chain_configs) + expected_tools_prompt = "foo: foobar1\nbar: foobar2" + expected_tool_names = "foo, bar" + expected_template = "\n\n".join( + [ + PREFIX, + expected_tools_prompt, + FORMAT_INSTRUCTIONS.format(tool_names=expected_tool_names), + SUFFIX, + ] + ) + prompt = agent.llm_chain.prompt + assert isinstance(prompt, PromptTemplate) + assert prompt.template == expected_template diff --git a/tests/unit_tests/agents/test_react.py b/tests/unit_tests/agents/test_react.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd5238677fad28d0f42962654c8da0fce39792a --- /dev/null +++ b/tests/unit_tests/agents/test_react.py @@ -0,0 +1,107 @@ +"""Unit tests for ReAct.""" + +from typing import Any, List, Mapping, Optional, Union + +from pydantic import BaseModel + +from langchain.agents.react.base import ReActChain, ReActDocstoreAgent +from langchain.agents.tools import Tool +from langchain.docstore.base import Docstore +from langchain.docstore.document import Document +from langchain.llms.base import LLM +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import AgentAction + +_PAGE_CONTENT = """This is a page about LangChain. + +It is a really cool framework. + +What isn't there to love about langchain? + +Made in 2022.""" + +_FAKE_PROMPT = PromptTemplate(input_variables=["input"], template="{input}") + + +class FakeListLLM(LLM, BaseModel): + """Fake LLM for testing that outputs elements of a list.""" + + responses: List[str] + i: int = -1 + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake_list" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Increment counter, and then return response in that index.""" + self.i += 1 + return self.responses[self.i] + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {} + + +class FakeDocstore(Docstore): + """Fake docstore for testing purposes.""" + + def search(self, search: str) -> Union[str, Document]: + """Return the fake document.""" + document = Document(page_content=_PAGE_CONTENT) + return document + + +def test_predict_until_observation_normal() -> None: + """Test predict_until_observation when observation is made normally.""" + outputs = ["foo\nAction 1: Search[foo]"] + fake_llm = FakeListLLM(responses=outputs) + tools = [ + Tool(name="Search", func=lambda x: x, description="foo"), + Tool(name="Lookup", func=lambda x: x, description="bar"), + ] + agent = ReActDocstoreAgent.from_llm_and_tools(fake_llm, tools) + output = agent.plan([], input="") + expected_output = AgentAction("Search", "foo", outputs[0]) + assert output == expected_output + + +def test_predict_until_observation_repeat() -> None: + """Test when no action is generated initially.""" + outputs = ["foo", " Search[foo]"] + fake_llm = FakeListLLM(responses=outputs) + tools = [ + Tool(name="Search", func=lambda x: x, description="foo"), + Tool(name="Lookup", func=lambda x: x, description="bar"), + ] + agent = ReActDocstoreAgent.from_llm_and_tools(fake_llm, tools) + output = agent.plan([], input="") + expected_output = AgentAction("Search", "foo", "foo\nAction 1: Search[foo]") + assert output == expected_output + + +def test_react_chain() -> None: + """Test react chain.""" + responses = [ + "I should probably search\nAction 1: Search[langchain]", + "I should probably lookup\nAction 2: Lookup[made]", + "Ah okay now I know the answer\nAction 3: Finish[2022]", + ] + fake_llm = FakeListLLM(responses=responses) + react_chain = ReActChain(llm=fake_llm, docstore=FakeDocstore()) + output = react_chain.run("when was langchain made") + assert output == "2022" + + +def test_react_chain_bad_action() -> None: + """Test react chain when bad action given.""" + bad_action_name = "BadAction" + responses = [ + f"I'm turning evil\nAction 1: {bad_action_name}[langchain]", + "Oh well\nAction 2: Finish[curses foiled again]", + ] + fake_llm = FakeListLLM(responses=responses) + react_chain = ReActChain(llm=fake_llm, docstore=FakeDocstore()) + output = react_chain.run("when was langchain made") + assert output == "curses foiled again" diff --git a/tests/unit_tests/agents/test_tools.py b/tests/unit_tests/agents/test_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..f01bfa330b01a1aa8e3a82b9862c8cbcb980b4bd --- /dev/null +++ b/tests/unit_tests/agents/test_tools.py @@ -0,0 +1,103 @@ +"""Test tool utils.""" +import pytest + +from langchain.agents.tools import Tool, tool + + +def test_unnamed_decorator() -> None: + """Test functionality with unnamed decorator.""" + + @tool + def search_api(query: str) -> str: + """Search the API for the query.""" + return "API result" + + assert isinstance(search_api, Tool) + assert search_api.name == "search_api" + assert not search_api.return_direct + assert search_api("test") == "API result" + + +def test_named_tool_decorator() -> None: + """Test functionality when arguments are provided as input to decorator.""" + + @tool("search") + def search_api(query: str) -> str: + """Search the API for the query.""" + return "API result" + + assert isinstance(search_api, Tool) + assert search_api.name == "search" + assert not search_api.return_direct + + +def test_named_tool_decorator_return_direct() -> None: + """Test functionality when arguments and return direct are provided as input.""" + + @tool("search", return_direct=True) + def search_api(query: str) -> str: + """Search the API for the query.""" + return "API result" + + assert isinstance(search_api, Tool) + assert search_api.name == "search" + assert search_api.return_direct + + +def test_unnamed_tool_decorator_return_direct() -> None: + """Test functionality when only return direct is provided.""" + + @tool(return_direct=True) + def search_api(query: str) -> str: + """Search the API for the query.""" + return "API result" + + assert isinstance(search_api, Tool) + assert search_api.name == "search_api" + assert search_api.return_direct + + +def test_missing_docstring() -> None: + """Test error is raised when docstring is missing.""" + # expect to throw a value error if theres no docstring + with pytest.raises(AssertionError): + + @tool + def search_api(query: str) -> str: + return "API result" + + +def test_create_tool_posistional_args() -> None: + """Test that positional arguments are allowed.""" + test_tool = Tool("test_name", lambda x: x, "test_description") + assert test_tool("foo") == "foo" + assert test_tool.name == "test_name" + assert test_tool.description == "test_description" + + +def test_create_tool_keyword_args() -> None: + """Test that keyword arguments are allowed.""" + test_tool = Tool(name="test_name", func=lambda x: x, description="test_description") + assert test_tool("foo") == "foo" + assert test_tool.name == "test_name" + assert test_tool.description == "test_description" + + +@pytest.mark.asyncio +async def test_create_async_tool() -> None: + """Test that async tools are allowed.""" + + async def _test_func(x: str) -> str: + return x + + test_tool = Tool( + name="test_name", + func=lambda x: x, + description="test_description", + coroutine=_test_func, + ) + assert test_tool("foo") == "foo" + assert test_tool.name == "test_name" + assert test_tool.description == "test_description" + assert test_tool.coroutine is not None + assert await test_tool.arun("foo") == "foo" diff --git a/tests/unit_tests/callbacks/__init__.py b/tests/unit_tests/callbacks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd34752b30997eb11a71a41080e1ccb01c9cae52 --- /dev/null +++ b/tests/unit_tests/callbacks/__init__.py @@ -0,0 +1 @@ +"""Tests for correct functioning of callbacks.""" diff --git a/tests/unit_tests/callbacks/fake_callback_handler.py b/tests/unit_tests/callbacks/fake_callback_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..921596e7563d486fc0ecbfe62be0b120d71d3892 --- /dev/null +++ b/tests/unit_tests/callbacks/fake_callback_handler.py @@ -0,0 +1,202 @@ +"""A fake callback handler for testing purposes.""" +from typing import Any, Dict, List, Union + +from pydantic import BaseModel + +from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler +from langchain.schema import AgentAction, AgentFinish, LLMResult + + +class BaseFakeCallbackHandler(BaseModel): + """Base fake callback handler for testing.""" + + starts: int = 0 + ends: int = 0 + errors: int = 0 + text: int = 0 + ignore_llm_: bool = False + ignore_chain_: bool = False + ignore_agent_: bool = False + always_verbose_: bool = False + + @property + def always_verbose(self) -> bool: + """Whether to call verbose callbacks even if verbose is False.""" + return self.always_verbose_ + + @property + def ignore_llm(self) -> bool: + """Whether to ignore LLM callbacks.""" + return self.ignore_llm_ + + @property + def ignore_chain(self) -> bool: + """Whether to ignore chain callbacks.""" + return self.ignore_chain_ + + @property + def ignore_agent(self) -> bool: + """Whether to ignore agent callbacks.""" + return self.ignore_agent_ + + # add finer-grained counters for easier debugging of failing tests + chain_starts: int = 0 + chain_ends: int = 0 + llm_starts: int = 0 + llm_ends: int = 0 + llm_streams: int = 0 + tool_starts: int = 0 + tool_ends: int = 0 + agent_ends: int = 0 + + +class FakeCallbackHandler(BaseFakeCallbackHandler, BaseCallbackHandler): + """Fake callback handler for testing.""" + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + self.llm_starts += 1 + self.starts += 1 + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run when LLM generates a new token.""" + self.llm_streams += 1 + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + self.llm_ends += 1 + self.ends += 1 + + def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + self.errors += 1 + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + self.chain_starts += 1 + self.starts += 1 + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + self.chain_ends += 1 + self.ends += 1 + + def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + self.errors += 1 + + def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + self.tool_starts += 1 + self.starts += 1 + + def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + self.tool_ends += 1 + self.ends += 1 + + def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + self.errors += 1 + + def on_text(self, text: str, **kwargs: Any) -> None: + """Run when agent is ending.""" + self.text += 1 + + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run when agent ends running.""" + self.agent_ends += 1 + self.ends += 1 + + def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: + """Run on agent action.""" + self.tool_starts += 1 + self.starts += 1 + + +class FakeAsyncCallbackHandler(BaseFakeCallbackHandler, AsyncCallbackHandler): + """Fake async callback handler for testing.""" + + async def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + self.llm_starts += 1 + self.starts += 1 + + async def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run when LLM generates a new token.""" + self.llm_streams += 1 + + async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """Run when LLM ends running.""" + self.llm_ends += 1 + self.ends += 1 + + async def on_llm_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when LLM errors.""" + self.errors += 1 + + async def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Run when chain starts running.""" + self.chain_starts += 1 + self.starts += 1 + + async def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Run when chain ends running.""" + self.chain_ends += 1 + self.ends += 1 + + async def on_chain_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when chain errors.""" + self.errors += 1 + + async def on_tool_start( + self, serialized: Dict[str, Any], input_str: str, **kwargs: Any + ) -> None: + """Run when tool starts running.""" + self.tool_starts += 1 + self.starts += 1 + + async def on_tool_end(self, output: str, **kwargs: Any) -> None: + """Run when tool ends running.""" + self.tool_ends += 1 + self.ends += 1 + + async def on_tool_error( + self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any + ) -> None: + """Run when tool errors.""" + self.errors += 1 + + async def on_text(self, text: str, **kwargs: Any) -> None: + """Run when agent is ending.""" + self.text += 1 + + async def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None: + """Run when agent ends running.""" + self.agent_ends += 1 + self.ends += 1 + + async def on_agent_action(self, action: AgentAction, **kwargs: Any) -> None: + """Run on agent action.""" + self.tool_starts += 1 + self.starts += 1 diff --git a/tests/unit_tests/callbacks/test_callback_manager.py b/tests/unit_tests/callbacks/test_callback_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..7819798ca5ff8e4709612665fde6470382f51bbb --- /dev/null +++ b/tests/unit_tests/callbacks/test_callback_manager.py @@ -0,0 +1,180 @@ +"""Test CallbackManager.""" +from typing import Tuple + +import pytest + +from langchain.callbacks.base import ( + AsyncCallbackManager, + BaseCallbackManager, + CallbackManager, +) +from langchain.callbacks.shared import SharedCallbackManager +from langchain.schema import AgentFinish, LLMResult +from tests.unit_tests.callbacks.fake_callback_handler import ( + BaseFakeCallbackHandler, + FakeAsyncCallbackHandler, + FakeCallbackHandler, +) + + +def _test_callback_manager( + manager: BaseCallbackManager, *handlers: BaseFakeCallbackHandler +) -> None: + """Test the CallbackManager.""" + manager.on_llm_start({}, []) + manager.on_llm_end(LLMResult(generations=[])) + manager.on_llm_error(Exception()) + manager.on_chain_start({"name": "foo"}, {}) + manager.on_chain_end({}) + manager.on_chain_error(Exception()) + manager.on_tool_start({}, "") + manager.on_tool_end("") + manager.on_tool_error(Exception()) + manager.on_agent_finish(AgentFinish(log="", return_values={})) + _check_num_calls(handlers) + + +async def _test_callback_manager_async( + manager: AsyncCallbackManager, *handlers: BaseFakeCallbackHandler +) -> None: + """Test the CallbackManager.""" + await manager.on_llm_start({}, []) + await manager.on_llm_end(LLMResult(generations=[])) + await manager.on_llm_error(Exception()) + await manager.on_chain_start({"name": "foo"}, {}) + await manager.on_chain_end({}) + await manager.on_chain_error(Exception()) + await manager.on_tool_start({}, "") + await manager.on_tool_end("") + await manager.on_tool_error(Exception()) + await manager.on_agent_finish(AgentFinish(log="", return_values={})) + _check_num_calls(handlers) + + +def _check_num_calls(handlers: Tuple[BaseFakeCallbackHandler, ...]) -> None: + for handler in handlers: + if handler.always_verbose: + assert handler.starts == 3 + assert handler.ends == 4 + assert handler.errors == 3 + else: + assert handler.starts == 0 + assert handler.ends == 0 + assert handler.errors == 0 + + +def _test_callback_manager_pass_in_verbose( + manager: BaseCallbackManager, *handlers: FakeCallbackHandler +) -> None: + """Test the CallbackManager.""" + manager.on_llm_start({}, [], verbose=True) + manager.on_llm_end(LLMResult(generations=[]), verbose=True) + manager.on_llm_error(Exception(), verbose=True) + manager.on_chain_start({"name": "foo"}, {}, verbose=True) + manager.on_chain_end({}, verbose=True) + manager.on_chain_error(Exception(), verbose=True) + manager.on_tool_start({}, "", verbose=True) + manager.on_tool_end("", verbose=True) + manager.on_tool_error(Exception(), verbose=True) + manager.on_agent_finish(AgentFinish(log="", return_values={}), verbose=True) + for handler in handlers: + assert handler.starts == 3 + assert handler.ends == 4 + assert handler.errors == 3 + + +def test_callback_manager() -> None: + """Test the CallbackManager.""" + handler1 = FakeCallbackHandler(always_verbose_=True) + handler2 = FakeCallbackHandler(always_verbose_=False) + manager = CallbackManager([handler1, handler2]) + _test_callback_manager(manager, handler1, handler2) + + +def test_callback_manager_pass_in_verbose() -> None: + """Test the CallbackManager.""" + handler1 = FakeCallbackHandler() + handler2 = FakeCallbackHandler() + manager = CallbackManager([handler1, handler2]) + _test_callback_manager_pass_in_verbose(manager, handler1, handler2) + + +def test_ignore_llm() -> None: + """Test ignore llm param for callback handlers.""" + handler1 = FakeCallbackHandler(ignore_llm_=True, always_verbose_=True) + handler2 = FakeCallbackHandler(always_verbose_=True) + manager = CallbackManager(handlers=[handler1, handler2]) + manager.on_llm_start({}, [], verbose=True) + manager.on_llm_end(LLMResult(generations=[]), verbose=True) + manager.on_llm_error(Exception(), verbose=True) + assert handler1.starts == 0 + assert handler1.ends == 0 + assert handler1.errors == 0 + assert handler2.starts == 1 + assert handler2.ends == 1 + assert handler2.errors == 1 + + +def test_ignore_chain() -> None: + """Test ignore chain param for callback handlers.""" + handler1 = FakeCallbackHandler(ignore_chain_=True, always_verbose_=True) + handler2 = FakeCallbackHandler(always_verbose_=True) + manager = CallbackManager(handlers=[handler1, handler2]) + manager.on_chain_start({"name": "foo"}, {}, verbose=True) + manager.on_chain_end({}, verbose=True) + manager.on_chain_error(Exception(), verbose=True) + assert handler1.starts == 0 + assert handler1.ends == 0 + assert handler1.errors == 0 + assert handler2.starts == 1 + assert handler2.ends == 1 + assert handler2.errors == 1 + + +def test_ignore_agent() -> None: + """Test ignore agent param for callback handlers.""" + handler1 = FakeCallbackHandler(ignore_agent_=True, always_verbose_=True) + handler2 = FakeCallbackHandler(always_verbose_=True) + manager = CallbackManager(handlers=[handler1, handler2]) + manager.on_tool_start({}, "", verbose=True) + manager.on_tool_end("", verbose=True) + manager.on_tool_error(Exception(), verbose=True) + manager.on_agent_finish(AgentFinish({}, ""), verbose=True) + assert handler1.starts == 0 + assert handler1.ends == 0 + assert handler1.errors == 0 + assert handler2.starts == 1 + assert handler2.ends == 2 + assert handler2.errors == 1 + + +def test_shared_callback_manager() -> None: + """Test the SharedCallbackManager.""" + manager1 = SharedCallbackManager() + manager2 = SharedCallbackManager() + + assert manager1 is manager2 + + handler1 = FakeCallbackHandler(always_verbose_=True) + handler2 = FakeCallbackHandler() + manager1.add_handler(handler1) + manager2.add_handler(handler2) + _test_callback_manager(manager1, handler1, handler2) + + +@pytest.mark.asyncio +async def test_async_callback_manager() -> None: + """Test the AsyncCallbackManager.""" + handler1 = FakeAsyncCallbackHandler(always_verbose_=True) + handler2 = FakeAsyncCallbackHandler() + manager = AsyncCallbackManager([handler1, handler2]) + await _test_callback_manager_async(manager, handler1, handler2) + + +@pytest.mark.asyncio +async def test_async_callback_manager_sync_handler() -> None: + """Test the AsyncCallbackManager.""" + handler1 = FakeCallbackHandler(always_verbose_=True) + handler2 = FakeAsyncCallbackHandler() + manager = AsyncCallbackManager([handler1, handler2]) + await _test_callback_manager_async(manager, handler1, handler2) diff --git a/tests/unit_tests/callbacks/tracers/__init__.py b/tests/unit_tests/callbacks/tracers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb6b04283374cc9e50e3415313063e71d6a92ca2 --- /dev/null +++ b/tests/unit_tests/callbacks/tracers/__init__.py @@ -0,0 +1 @@ +"""Tests for correct functioning of tracers.""" diff --git a/tests/unit_tests/callbacks/tracers/test_tracer.py b/tests/unit_tests/callbacks/tracers/test_tracer.py new file mode 100644 index 0000000000000000000000000000000000000000..ab18d53e7ea60ec2eb86c53bf80f81d8eaf780dc --- /dev/null +++ b/tests/unit_tests/callbacks/tracers/test_tracer.py @@ -0,0 +1,522 @@ +"""Test Tracer classes.""" +from __future__ import annotations + +import threading +from datetime import datetime +from typing import List, Optional, Union + +import pytest +from freezegun import freeze_time + +from langchain.callbacks.tracers.base import ( + BaseTracer, + ChainRun, + LLMRun, + SharedTracer, + ToolRun, + Tracer, + TracerException, + TracerSession, +) +from langchain.callbacks.tracers.schemas import TracerSessionCreate +from langchain.schema import LLMResult + +TEST_SESSION_ID = 2023 + + +@freeze_time("2023-01-01") +def _get_compare_run() -> Union[LLMRun, ChainRun, ToolRun]: + return ChainRun( + id=None, + error=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + inputs={}, + outputs={}, + session_id=TEST_SESSION_ID, + child_runs=[ + ToolRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=2, + serialized={}, + tool_input="test", + output="test", + action="{}", + session_id=TEST_SESSION_ID, + error=None, + child_runs=[ + LLMRun( + id=None, + error=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=3, + serialized={}, + prompts=[], + response=LLMResult(generations=[[]]), + session_id=TEST_SESSION_ID, + ) + ], + ), + LLMRun( + id=None, + error=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=4, + serialized={}, + prompts=[], + response=LLMResult(generations=[[]]), + session_id=TEST_SESSION_ID, + ), + ], + ) + + +def _perform_nested_run(tracer: BaseTracer) -> None: + """Perform a nested run.""" + tracer.on_chain_start(serialized={}, inputs={}) + tracer.on_tool_start(serialized={}, input_str="test") + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + tracer.on_tool_end("test") + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + tracer.on_chain_end(outputs={}) + + +def _add_child_run( + parent_run: Union[ChainRun, ToolRun], + child_run: Union[LLMRun, ChainRun, ToolRun], +) -> None: + """Add child run to a chain run or tool run.""" + parent_run.child_runs.append(child_run) + + +def _generate_id() -> Optional[Union[int, str]]: + """Generate an id for a run.""" + return None + + +def load_session(session_name: str) -> TracerSession: + """Load a tracing session.""" + return TracerSession(id=1, name=session_name, start_time=datetime.utcnow()) + + +def _persist_session(session: TracerSessionCreate) -> TracerSession: + """Persist a tracing session.""" + return TracerSession(id=TEST_SESSION_ID, **session.dict()) + + +def load_default_session() -> TracerSession: + """Load a tracing session.""" + return TracerSession(id=1, name="default", start_time=datetime.utcnow()) + + +class FakeTracer(Tracer): + """Fake tracer that records LangChain execution.""" + + def __init__(self) -> None: + """Initialize the tracer.""" + super().__init__() + self.runs: List[Union[LLMRun, ChainRun, ToolRun]] = [] + + def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None: + """Persist a run.""" + self.runs.append(run) + + def _add_child_run( + self, + parent_run: Union[ChainRun, ToolRun], + child_run: Union[LLMRun, ChainRun, ToolRun], + ) -> None: + """Add child run to a chain run or tool run.""" + _add_child_run(parent_run, child_run) + + def _generate_id(self) -> Optional[Union[int, str]]: + """Generate an id for a run.""" + return _generate_id() + + def _persist_session(self, session: TracerSessionCreate) -> TracerSession: + """Persist a tracing session.""" + return _persist_session(session) + + def load_session(self, session_name: str) -> TracerSession: + """Load a tracing session.""" + return load_session(session_name) + + def load_default_session(self) -> TracerSession: + """Load a tracing session.""" + return load_default_session() + + +class FakeSharedTracer(SharedTracer): + """Fake shared tracer that records LangChain execution.""" + + runs: List[Union[LLMRun, ChainRun, ToolRun]] = [] + + def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None: + """Persist a run.""" + with self._lock: + self.runs.append(run) + + def remove_runs(self) -> None: + """Remove all runs.""" + with self._lock: + self.runs = [] + + def _add_child_run( + self, + parent_run: Union[ChainRun, ToolRun], + child_run: Union[LLMRun, ChainRun, ToolRun], + ) -> None: + """Add child run to a chain run or tool run.""" + _add_child_run(parent_run, child_run) + + def _generate_id(self) -> Optional[Union[int, str]]: + """Generate an id for a run.""" + return _generate_id() + + def _persist_session(self, session: TracerSessionCreate) -> TracerSession: + """Persist a tracing session.""" + return _persist_session(session) + + def load_session(self, session_name: str) -> TracerSession: + """Load a tracing session.""" + return load_session(session_name) + + def load_default_session(self) -> TracerSession: + """Load a tracing session.""" + return load_default_session() + + +@freeze_time("2023-01-01") +def test_tracer_llm_run() -> None: + """Test tracer on an LLM run.""" + compare_run = LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + prompts=[], + response=LLMResult(generations=[[]]), + session_id=TEST_SESSION_ID, + error=None, + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_llm_run_errors_no_session() -> None: + """Test tracer on an LLM run without a session.""" + tracer = FakeTracer() + + with pytest.raises(TracerException): + tracer.on_llm_start(serialized={}, prompts=[]) + + +@freeze_time("2023-01-01") +def test_tracer_llm_run_errors_no_start() -> None: + """Test tracer on an LLM run without a start.""" + tracer = FakeTracer() + + tracer.new_session() + with pytest.raises(TracerException): + tracer.on_llm_end(response=LLMResult(generations=[[]])) + + +@freeze_time("2023-01-01") +def test_tracer_multiple_llm_runs() -> None: + """Test the tracer with multiple runs.""" + compare_run = LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + prompts=[], + response=LLMResult(generations=[[]]), + session_id=TEST_SESSION_ID, + error=None, + ) + tracer = FakeTracer() + + tracer.new_session() + num_runs = 10 + for _ in range(num_runs): + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + + assert tracer.runs == [compare_run] * num_runs + + +@freeze_time("2023-01-01") +def test_tracer_chain_run() -> None: + """Test tracer on a Chain run.""" + compare_run = ChainRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + inputs={}, + outputs={}, + session_id=TEST_SESSION_ID, + error=None, + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_chain_start(serialized={}, inputs={}) + tracer.on_chain_end(outputs={}) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_tool_run() -> None: + """Test tracer on a Tool run.""" + compare_run = ToolRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + tool_input="test", + output="test", + action="{}", + session_id=TEST_SESSION_ID, + error=None, + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_tool_start(serialized={}, input_str="test") + tracer.on_tool_end("test") + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_nested_run() -> None: + """Test tracer on a nested run.""" + tracer = FakeTracer() + tracer.new_session() + _perform_nested_run(tracer) + assert tracer.runs == [_get_compare_run()] + + +@freeze_time("2023-01-01") +def test_tracer_llm_run_on_error() -> None: + """Test tracer on an LLM run with an error.""" + exception = Exception("test") + + compare_run = LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + prompts=[], + response=None, + session_id=TEST_SESSION_ID, + error=repr(exception), + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_error(exception) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_chain_run_on_error() -> None: + """Test tracer on a Chain run with an error.""" + exception = Exception("test") + + compare_run = ChainRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + inputs={}, + outputs=None, + session_id=TEST_SESSION_ID, + error=repr(exception), + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_chain_start(serialized={}, inputs={}) + tracer.on_chain_error(exception) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_tool_run_on_error() -> None: + """Test tracer on a Tool run with an error.""" + exception = Exception("test") + + compare_run = ToolRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + tool_input="test", + output=None, + action="{}", + session_id=TEST_SESSION_ID, + error=repr(exception), + ) + tracer = FakeTracer() + + tracer.new_session() + tracer.on_tool_start(serialized={}, input_str="test") + tracer.on_tool_error(exception) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +def test_tracer_nested_runs_on_error() -> None: + """Test tracer on a nested run with an error.""" + exception = Exception("test") + + tracer = FakeTracer() + tracer.new_session() + + for _ in range(3): + tracer.on_chain_start(serialized={}, inputs={}) + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_end(response=LLMResult(generations=[[]])) + tracer.on_tool_start(serialized={}, input_str="test") + tracer.on_llm_start(serialized={}, prompts=[]) + tracer.on_llm_error(exception) + tracer.on_tool_error(exception) + tracer.on_chain_error(exception) + + compare_run = ChainRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=1, + serialized={}, + session_id=TEST_SESSION_ID, + error=repr(exception), + inputs={}, + outputs=None, + child_runs=[ + LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=2, + serialized={}, + session_id=TEST_SESSION_ID, + error=None, + prompts=[], + response=LLMResult(generations=[[]], llm_output=None), + ), + LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=3, + serialized={}, + session_id=TEST_SESSION_ID, + error=None, + prompts=[], + response=LLMResult(generations=[[]], llm_output=None), + ), + ToolRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=4, + serialized={}, + session_id=TEST_SESSION_ID, + error=repr(exception), + tool_input="test", + output=None, + action="{}", + child_runs=[ + LLMRun( + id=None, + start_time=datetime.utcnow(), + end_time=datetime.utcnow(), + extra={}, + execution_order=5, + serialized={}, + session_id=TEST_SESSION_ID, + error=repr(exception), + prompts=[], + response=None, + ) + ], + child_llm_runs=[], + child_chain_runs=[], + child_tool_runs=[], + ), + ], + child_llm_runs=[], + child_chain_runs=[], + child_tool_runs=[], + ) + + assert tracer.runs == [compare_run] * 3 + + +@freeze_time("2023-01-01") +def test_shared_tracer_nested_run() -> None: + """Test shared tracer on a nested run.""" + tracer = FakeSharedTracer() + tracer.new_session() + tracer.remove_runs() + _perform_nested_run(tracer) + assert tracer.runs == [_get_compare_run()] + + +@freeze_time("2023-01-01") +def test_shared_tracer_nested_run_multithreaded() -> None: + """Test shared tracer on a nested run.""" + tracer = FakeSharedTracer() + tracer.remove_runs() + tracer.new_session() + threads = [] + num_threads = 10 + for _ in range(num_threads): + thread = threading.Thread(target=_perform_nested_run, args=(tracer,)) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + assert tracer.runs == [_get_compare_run()] * num_threads diff --git a/tests/unit_tests/chains/__init__.py b/tests/unit_tests/chains/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1765c676c2b7cad794fd2c6b54ea7014c352e4a --- /dev/null +++ b/tests/unit_tests/chains/__init__.py @@ -0,0 +1 @@ +"""Tests for correct functioning of chains.""" diff --git a/tests/unit_tests/chains/test_api.py b/tests/unit_tests/chains/test_api.py new file mode 100644 index 0000000000000000000000000000000000000000..8749d100e9a2f1d9a47fe9f8340bcddf040a0c3d --- /dev/null +++ b/tests/unit_tests/chains/test_api.py @@ -0,0 +1,85 @@ +"""Test LLM Math functionality.""" + +import json + +import pytest + +from langchain import LLMChain +from langchain.chains.api.base import APIChain +from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT +from langchain.requests import RequestsWrapper +from tests.unit_tests.llms.fake_llm import FakeLLM + + +class FakeRequestsChain(RequestsWrapper): + """Fake requests chain just for testing purposes.""" + + output: str + + def get(self, url: str) -> str: + """Just return the specified output.""" + return self.output + + +@pytest.fixture +def test_api_data() -> dict: + """Fake api data to use for testing.""" + api_docs = """ + This API endpoint will search the notes for a user. + + Endpoint: https://thisapidoesntexist.com + GET /api/notes + + Query parameters: + q | string | The search term for notes + """ + return { + "api_docs": api_docs, + "question": "Search for notes containing langchain", + "api_url": "https://thisapidoesntexist.com/api/notes?q=langchain", + "api_response": json.dumps( + { + "success": True, + "results": [{"id": 1, "content": "Langchain is awesome!"}], + } + ), + "api_summary": "There is 1 note about langchain.", + } + + +@pytest.fixture +def fake_llm_api_chain(test_api_data: dict) -> APIChain: + """Fake LLM API chain for testing.""" + TEST_API_DOCS = test_api_data["api_docs"] + TEST_QUESTION = test_api_data["question"] + TEST_URL = test_api_data["api_url"] + TEST_API_RESPONSE = test_api_data["api_response"] + TEST_API_SUMMARY = test_api_data["api_summary"] + + api_url_query_prompt = API_URL_PROMPT.format( + api_docs=TEST_API_DOCS, question=TEST_QUESTION + ) + api_response_prompt = API_RESPONSE_PROMPT.format( + api_docs=TEST_API_DOCS, + question=TEST_QUESTION, + api_url=TEST_URL, + api_response=TEST_API_RESPONSE, + ) + queries = {api_url_query_prompt: TEST_URL, api_response_prompt: TEST_API_SUMMARY} + fake_llm = FakeLLM(queries=queries) + api_request_chain = LLMChain(llm=fake_llm, prompt=API_URL_PROMPT) + api_answer_chain = LLMChain(llm=fake_llm, prompt=API_RESPONSE_PROMPT) + requests_wrapper = FakeRequestsChain(output=TEST_API_RESPONSE) + return APIChain( + api_request_chain=api_request_chain, + api_answer_chain=api_answer_chain, + requests_wrapper=requests_wrapper, + api_docs=TEST_API_DOCS, + ) + + +def test_api_question(fake_llm_api_chain: APIChain, test_api_data: dict) -> None: + """Test simple question that needs API access.""" + question = test_api_data["question"] + output = fake_llm_api_chain.run(question) + assert output == test_api_data["api_summary"] diff --git a/tests/unit_tests/chains/test_base.py b/tests/unit_tests/chains/test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..f24fcaf0577a7e64202864624e3acbfd346c1efd --- /dev/null +++ b/tests/unit_tests/chains/test_base.py @@ -0,0 +1,168 @@ +"""Test logic on base chain class.""" +from typing import Any, Dict, List, Optional + +import pytest +from pydantic import BaseModel + +from langchain.callbacks.base import CallbackManager +from langchain.chains.base import Chain +from langchain.schema import BaseMemory +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +class FakeMemory(BaseMemory, BaseModel): + """Fake memory class for testing purposes.""" + + @property + def memory_variables(self) -> List[str]: + """Return baz variable.""" + return ["baz"] + + def load_memory_variables( + self, inputs: Optional[Dict[str, Any]] = None + ) -> Dict[str, str]: + """Return baz variable.""" + return {"baz": "foo"} + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Pass.""" + pass + + def clear(self) -> None: + """Pass.""" + pass + + +class FakeChain(Chain, BaseModel): + """Fake chain class for testing purposes.""" + + be_correct: bool = True + the_input_keys: List[str] = ["foo"] + the_output_keys: List[str] = ["bar"] + + @property + def input_keys(self) -> List[str]: + """Input keys.""" + return self.the_input_keys + + @property + def output_keys(self) -> List[str]: + """Output key of bar.""" + return self.the_output_keys + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + if self.be_correct: + return {"bar": "baz"} + else: + return {"baz": "bar"} + + +def test_bad_inputs() -> None: + """Test errors are raised if input keys are not found.""" + chain = FakeChain() + with pytest.raises(ValueError): + chain({"foobar": "baz"}) + + +def test_bad_outputs() -> None: + """Test errors are raised if outputs keys are not found.""" + chain = FakeChain(be_correct=False) + with pytest.raises(ValueError): + chain({"foo": "baz"}) + + +def test_correct_call() -> None: + """Test correct call of fake chain.""" + chain = FakeChain() + output = chain({"foo": "bar"}) + assert output == {"foo": "bar", "bar": "baz"} + + +def test_single_input_correct() -> None: + """Test passing single input works.""" + chain = FakeChain() + output = chain("bar") + assert output == {"foo": "bar", "bar": "baz"} + + +def test_single_input_error() -> None: + """Test passing single input errors as expected.""" + chain = FakeChain(the_input_keys=["foo", "bar"]) + with pytest.raises(ValueError): + chain("bar") + + +def test_run_single_arg() -> None: + """Test run method with single arg.""" + chain = FakeChain() + output = chain.run("bar") + assert output == "baz" + + +def test_run_multiple_args_error() -> None: + """Test run method with multiple args errors as expected.""" + chain = FakeChain() + with pytest.raises(ValueError): + chain.run("bar", "foo") + + +def test_run_kwargs() -> None: + """Test run method with kwargs.""" + chain = FakeChain(the_input_keys=["foo", "bar"]) + output = chain.run(foo="bar", bar="foo") + assert output == "baz" + + +def test_run_kwargs_error() -> None: + """Test run method with kwargs errors as expected.""" + chain = FakeChain(the_input_keys=["foo", "bar"]) + with pytest.raises(ValueError): + chain.run(foo="bar", baz="foo") + + +def test_run_args_and_kwargs_error() -> None: + """Test run method with args and kwargs.""" + chain = FakeChain(the_input_keys=["foo", "bar"]) + with pytest.raises(ValueError): + chain.run("bar", foo="bar") + + +def test_multiple_output_keys_error() -> None: + """Test run with multiple output keys errors as expected.""" + chain = FakeChain(the_output_keys=["foo", "bar"]) + with pytest.raises(ValueError): + chain.run("bar") + + +def test_run_arg_with_memory() -> None: + """Test run method works when arg is passed.""" + chain = FakeChain(the_input_keys=["foo", "baz"], memory=FakeMemory()) + chain.run("bar") + + +def test_run_with_callback() -> None: + """Test run method works when callback manager is passed.""" + handler = FakeCallbackHandler() + chain = FakeChain( + callback_manager=CallbackManager(handlers=[handler]), verbose=True + ) + output = chain.run("bar") + assert output == "baz" + assert handler.starts == 1 + assert handler.ends == 1 + assert handler.errors == 0 + + +def test_run_with_callback_not_verbose() -> None: + """Test run method works when callback manager is passed and not verbose.""" + import langchain + + langchain.verbose = False + + handler = FakeCallbackHandler() + chain = FakeChain(callback_manager=CallbackManager(handlers=[handler])) + output = chain.run("bar") + assert output == "baz" + assert handler.starts == 0 + assert handler.ends == 0 + assert handler.errors == 0 diff --git a/tests/unit_tests/chains/test_combine_documents.py b/tests/unit_tests/chains/test_combine_documents.py new file mode 100644 index 0000000000000000000000000000000000000000..fca09f4ab40b50d307adadddf5e71f21e06b520c --- /dev/null +++ b/tests/unit_tests/chains/test_combine_documents.py @@ -0,0 +1,118 @@ +"""Test functionality related to combining documents.""" + +from typing import Any, List, Tuple + +import pytest + +from langchain.chains.combine_documents.map_reduce import ( + _collapse_docs, + _split_list_of_docs, +) +from langchain.docstore.document import Document + + +def _fake_docs_len_func(docs: List[Document]) -> int: + return len(_fake_combine_docs_func(docs)[0]) + + +def _fake_combine_docs_func(docs: List[Document], **kwargs: Any) -> Tuple[str, dict]: + return "".join([d.page_content for d in docs]), {} + + +def test__split_list_long_single_doc() -> None: + """Test splitting of a long single doc.""" + docs = [Document(page_content="foo" * 100)] + with pytest.raises(ValueError): + _split_list_of_docs(docs, _fake_docs_len_func, 100) + + +def test__split_list_long_pair_doc() -> None: + """Test splitting of a list with two medium docs.""" + docs = [Document(page_content="foo" * 30)] * 2 + with pytest.raises(ValueError): + _split_list_of_docs(docs, _fake_docs_len_func, 100) + + +def test__split_list_single_doc() -> None: + """Test splitting works with just a single doc.""" + docs = [Document(page_content="foo")] + doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100) + assert doc_list == [docs] + + +def test__split_list_double_doc() -> None: + """Test splitting works with just two docs.""" + docs = [Document(page_content="foo"), Document(page_content="bar")] + doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 100) + assert doc_list == [docs] + + +def test__split_list_works_correctly() -> None: + """Test splitting works correctly.""" + docs = [ + Document(page_content="foo"), + Document(page_content="bar"), + Document(page_content="baz"), + Document(page_content="foo" * 2), + Document(page_content="bar"), + Document(page_content="baz"), + ] + doc_list = _split_list_of_docs(docs, _fake_docs_len_func, 10) + expected_result = [ + # Test a group of three. + [ + Document(page_content="foo"), + Document(page_content="bar"), + Document(page_content="baz"), + ], + # Test a group of two, where one is bigger. + [Document(page_content="foo" * 2), Document(page_content="bar")], + # Test no errors on last + [Document(page_content="baz")], + ] + assert doc_list == expected_result + + +def test__collapse_docs_no_metadata() -> None: + """Test collapse documents functionality when no metadata.""" + docs = [ + Document(page_content="foo"), + Document(page_content="bar"), + Document(page_content="baz"), + ] + output = _collapse_docs(docs, _fake_combine_docs_func) + expected_output = Document(page_content="foobarbaz") + assert output == expected_output + + +def test__collapse_docs_one_doc() -> None: + """Test collapse documents functionality when only one document present.""" + # Test with no metadata. + docs = [Document(page_content="foo")] + output = _collapse_docs(docs, _fake_combine_docs_func) + assert output == docs[0] + + # Test with metadata. + docs = [Document(page_content="foo", metadata={"source": "a"})] + output = _collapse_docs(docs, _fake_combine_docs_func) + assert output == docs[0] + + +def test__collapse_docs_metadata() -> None: + """Test collapse documents functionality when metadata exists.""" + metadata1 = {"source": "a", "foo": 2, "bar": "1", "extra1": "foo"} + metadata2 = {"source": "b", "foo": "3", "bar": 2, "extra2": "bar"} + docs = [ + Document(page_content="foo", metadata=metadata1), + Document(page_content="bar", metadata=metadata2), + ] + output = _collapse_docs(docs, _fake_combine_docs_func) + expected_metadata = { + "source": "a, b", + "foo": "2, 3", + "bar": "1, 2", + "extra1": "foo", + "extra2": "bar", + } + expected_output = Document(page_content="foobar", metadata=expected_metadata) + assert output == expected_output diff --git a/tests/unit_tests/chains/test_constitutional_ai.py b/tests/unit_tests/chains/test_constitutional_ai.py new file mode 100644 index 0000000000000000000000000000000000000000..f8459d61a974c196f9986d549ed84cbdb881061c --- /dev/null +++ b/tests/unit_tests/chains/test_constitutional_ai.py @@ -0,0 +1,26 @@ +"""Unit tests for the Constitutional AI chain.""" +from langchain.chains.constitutional_ai.base import ConstitutionalChain + +TEXT_ONE = """ This text is bad. + +Revision request: Make it better. + +Revision:""" + +TEXT_TWO = """ This text is bad.\n\n""" + +TEXT_THREE = """ This text is bad. + +Revision request: Make it better. + +Revision: Better text""" + + +def test_critique_parsing() -> None: + """Test parsing of critique text.""" + for text in [TEXT_ONE, TEXT_TWO, TEXT_THREE]: + critique = ConstitutionalChain._parse_critique(text) + + assert ( + critique.strip() == "This text is bad." + ), f"Failed on {text} with {critique}" diff --git a/tests/unit_tests/chains/test_conversation.py b/tests/unit_tests/chains/test_conversation.py new file mode 100644 index 0000000000000000000000000000000000000000..42ebcd28d1967abdc047457281fe114bc04df61a --- /dev/null +++ b/tests/unit_tests/chains/test_conversation.py @@ -0,0 +1,102 @@ +"""Test conversation chain and memory.""" +import pytest + +from langchain.chains.conversation.base import ConversationChain +from langchain.memory.buffer import ConversationBufferMemory +from langchain.memory.buffer_window import ConversationBufferWindowMemory +from langchain.memory.summary import ConversationSummaryMemory +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import BaseMemory +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_memory_ai_prefix() -> None: + """Test that ai_prefix in the memory component works.""" + memory = ConversationBufferMemory(memory_key="foo", ai_prefix="Assistant") + memory.save_context({"input": "bar"}, {"output": "foo"}) + assert memory.buffer == "Human: bar\nAssistant: foo" + + +def test_memory_human_prefix() -> None: + """Test that human_prefix in the memory component works.""" + memory = ConversationBufferMemory(memory_key="foo", human_prefix="Friend") + memory.save_context({"input": "bar"}, {"output": "foo"}) + assert memory.buffer == "Friend: bar\nAI: foo" + + +def test_conversation_chain_works() -> None: + """Test that conversation chain works in basic setting.""" + llm = FakeLLM() + prompt = PromptTemplate(input_variables=["foo", "bar"], template="{foo} {bar}") + memory = ConversationBufferMemory(memory_key="foo") + chain = ConversationChain(llm=llm, prompt=prompt, memory=memory, input_key="bar") + chain.run("foo") + + +def test_conversation_chain_errors_bad_prompt() -> None: + """Test that conversation chain raise error with bad prompt.""" + llm = FakeLLM() + prompt = PromptTemplate(input_variables=[], template="nothing here") + with pytest.raises(ValueError): + ConversationChain(llm=llm, prompt=prompt) + + +def test_conversation_chain_errors_bad_variable() -> None: + """Test that conversation chain raise error with bad variable.""" + llm = FakeLLM() + prompt = PromptTemplate(input_variables=["foo"], template="{foo}") + memory = ConversationBufferMemory(memory_key="foo") + with pytest.raises(ValueError): + ConversationChain(llm=llm, prompt=prompt, memory=memory, input_key="foo") + + +@pytest.mark.parametrize( + "memory", + [ + ConversationBufferMemory(memory_key="baz"), + ConversationBufferWindowMemory(memory_key="baz"), + ConversationSummaryMemory(llm=FakeLLM(), memory_key="baz"), + ], +) +def test_conversation_memory(memory: BaseMemory) -> None: + """Test basic conversation memory functionality.""" + # This is a good input because the input is not the same as baz. + good_inputs = {"foo": "bar", "baz": "foo"} + # This is a good output because these is one variable. + good_outputs = {"bar": "foo"} + memory.save_context(good_inputs, good_outputs) + # This is a bad input because there are two variables that aren't the same as baz. + bad_inputs = {"foo": "bar", "foo1": "bar"} + with pytest.raises(ValueError): + memory.save_context(bad_inputs, good_outputs) + # This is a bad input because the only variable is the same as baz. + bad_inputs = {"baz": "bar"} + with pytest.raises(ValueError): + memory.save_context(bad_inputs, good_outputs) + # This is a bad output because it is empty. + with pytest.raises(ValueError): + memory.save_context(good_inputs, {}) + # This is a bad output because there are two keys. + bad_outputs = {"foo": "bar", "foo1": "bar"} + with pytest.raises(ValueError): + memory.save_context(good_inputs, bad_outputs) + + +@pytest.mark.parametrize( + "memory", + [ + ConversationBufferMemory(memory_key="baz"), + ConversationSummaryMemory(llm=FakeLLM(), memory_key="baz"), + ConversationBufferWindowMemory(memory_key="baz"), + ], +) +def test_clearing_conversation_memory(memory: BaseMemory) -> None: + """Test clearing the conversation memory.""" + # This is a good input because the input is not the same as baz. + good_inputs = {"foo": "bar", "baz": "foo"} + # This is a good output because there is one variable. + good_outputs = {"bar": "foo"} + memory.save_context(good_inputs, good_outputs) + + memory.clear() + assert memory.load_memory_variables({}) == {"baz": ""} diff --git a/tests/unit_tests/chains/test_hyde.py b/tests/unit_tests/chains/test_hyde.py new file mode 100644 index 0000000000000000000000000000000000000000..fd7f3d61893e47f26d4aee89aa31de4b9d452e7c --- /dev/null +++ b/tests/unit_tests/chains/test_hyde.py @@ -0,0 +1,62 @@ +"""Test HyDE.""" +from typing import List, Optional + +import numpy as np +from pydantic import BaseModel + +from langchain.chains.hyde.base import HypotheticalDocumentEmbedder +from langchain.chains.hyde.prompts import PROMPT_MAP +from langchain.embeddings.base import Embeddings +from langchain.llms.base import BaseLLM +from langchain.schema import Generation, LLMResult + + +class FakeEmbeddings(Embeddings): + """Fake embedding class for tests.""" + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return random floats.""" + return [list(np.random.uniform(0, 1, 10)) for _ in range(10)] + + def embed_query(self, text: str) -> List[float]: + """Return random floats.""" + return list(np.random.uniform(0, 1, 10)) + + +class FakeLLM(BaseLLM, BaseModel): + """Fake LLM wrapper for testing purposes.""" + + n: int = 1 + + def _generate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]]) + + async def _agenerate( + self, prompts: List[str], stop: Optional[List[str]] = None + ) -> LLMResult: + return LLMResult(generations=[[Generation(text="foo") for _ in range(self.n)]]) + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake" + + +def test_hyde_from_llm() -> None: + """Test loading HyDE from all prompts.""" + for key in PROMPT_MAP: + embedding = HypotheticalDocumentEmbedder.from_llm( + FakeLLM(), FakeEmbeddings(), key + ) + embedding.embed_query("foo") + + +def test_hyde_from_llm_with_multiple_n() -> None: + """Test loading HyDE from all prompts.""" + for key in PROMPT_MAP: + embedding = HypotheticalDocumentEmbedder.from_llm( + FakeLLM(n=8), FakeEmbeddings(), key + ) + embedding.embed_query("foo") diff --git a/tests/unit_tests/chains/test_llm.py b/tests/unit_tests/chains/test_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..66b42e70330bb5555f41d54f75e9aa9f403fffbf --- /dev/null +++ b/tests/unit_tests/chains/test_llm.py @@ -0,0 +1,71 @@ +"""Test LLM chain.""" +from tempfile import TemporaryDirectory +from typing import Dict, List, Union +from unittest.mock import patch + +import pytest + +from langchain.chains.llm import LLMChain +from langchain.chains.loading import load_chain +from langchain.prompts.prompt import PromptTemplate +from langchain.schema import BaseOutputParser +from tests.unit_tests.llms.fake_llm import FakeLLM + + +class FakeOutputParser(BaseOutputParser): + """Fake output parser class for testing.""" + + def parse(self, text: str) -> Union[str, List[str], Dict[str, str]]: + """Parse by splitting.""" + return text.split() + + +@pytest.fixture +def fake_llm_chain() -> LLMChain: + """Fake LLM chain for testing purposes.""" + prompt = PromptTemplate(input_variables=["bar"], template="This is a {bar}:") + return LLMChain(prompt=prompt, llm=FakeLLM(), output_key="text1") + + +@patch("langchain.llms.loading.type_to_cls_dict", {"fake": FakeLLM}) +def test_serialization(fake_llm_chain: LLMChain) -> None: + """Test serialization.""" + with TemporaryDirectory() as temp_dir: + file = temp_dir + "/llm.json" + fake_llm_chain.save(file) + loaded_chain = load_chain(file) + assert loaded_chain == fake_llm_chain + + +def test_missing_inputs(fake_llm_chain: LLMChain) -> None: + """Test error is raised if inputs are missing.""" + with pytest.raises(ValueError): + fake_llm_chain({"foo": "bar"}) + + +def test_valid_call(fake_llm_chain: LLMChain) -> None: + """Test valid call of LLM chain.""" + output = fake_llm_chain({"bar": "baz"}) + assert output == {"bar": "baz", "text1": "foo"} + + # Test with stop words. + output = fake_llm_chain({"bar": "baz", "stop": ["foo"]}) + # Response should be `bar` now. + assert output == {"bar": "baz", "stop": ["foo"], "text1": "bar"} + + +def test_predict_method(fake_llm_chain: LLMChain) -> None: + """Test predict method works.""" + output = fake_llm_chain.predict(bar="baz") + assert output == "foo" + + +def test_predict_and_parse() -> None: + """Test parsing ability.""" + prompt = PromptTemplate( + input_variables=["foo"], template="{foo}", output_parser=FakeOutputParser() + ) + llm = FakeLLM(queries={"foo": "foo bar"}) + chain = LLMChain(prompt=prompt, llm=llm) + output = chain.predict_and_parse(foo="foo") + assert output == ["foo", "bar"] diff --git a/tests/unit_tests/chains/test_llm_bash.py b/tests/unit_tests/chains/test_llm_bash.py new file mode 100644 index 0000000000000000000000000000000000000000..3c2d9ae77623aac535802714c4d676654cc35d8e --- /dev/null +++ b/tests/unit_tests/chains/test_llm_bash.py @@ -0,0 +1,24 @@ +"""Test LLM Bash functionality.""" + +import pytest + +from langchain.chains.llm_bash.base import LLMBashChain +from langchain.chains.llm_bash.prompt import _PROMPT_TEMPLATE +from tests.unit_tests.llms.fake_llm import FakeLLM + + +@pytest.fixture +def fake_llm_bash_chain() -> LLMBashChain: + """Fake LLM Bash chain for testing.""" + question = "Please write a bash script that prints 'Hello World' to the console." + prompt = _PROMPT_TEMPLATE.format(question=question) + queries = {prompt: "```bash\nexpr 1 + 1\n```"} + fake_llm = FakeLLM(queries=queries) + return LLMBashChain(llm=fake_llm, input_key="q", output_key="a") + + +def test_simple_question(fake_llm_bash_chain: LLMBashChain) -> None: + """Test simple question that should not need python.""" + question = "Please write a bash script that prints 'Hello World' to the console." + output = fake_llm_bash_chain.run(question) + assert output == "2\n" diff --git a/tests/unit_tests/chains/test_llm_checker.py b/tests/unit_tests/chains/test_llm_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..0c9b9343550a9897fa62da4ba1fded2168bd9a8c --- /dev/null +++ b/tests/unit_tests/chains/test_llm_checker.py @@ -0,0 +1,43 @@ +# flake8: noqa E501 + +"""Test LLMCheckerChain functionality.""" + +import pytest + +from langchain.chains.llm_checker.base import LLMCheckerChain +from langchain.chains.llm_checker.prompt import ( + _CHECK_ASSERTIONS_TEMPLATE, + _CREATE_DRAFT_ANSWER_TEMPLATE, + _LIST_ASSERTIONS_TEMPLATE, + _REVISED_ANSWER_TEMPLATE, +) +from tests.unit_tests.llms.fake_llm import FakeLLM + + +@pytest.fixture +def fake_llm_checker_chain() -> LLMCheckerChain: + """Fake LLMCheckerChain for testing.""" + queries = { + _CREATE_DRAFT_ANSWER_TEMPLATE.format( + question="Which mammal lays the biggest eggs?" + ): "I don't know which mammal layers the biggest eggs.", + _LIST_ASSERTIONS_TEMPLATE.format( + statement="I don't know which mammal layers the biggest eggs.", + ): "1) I know that mammals lay eggs.\n2) I know that birds lay eggs.\n3) I know that birds are mammals.", + _CHECK_ASSERTIONS_TEMPLATE.format( + assertions="1) I know that mammals lay eggs.\n2) I know that birds lay eggs.\n3) I know that birds are mammals.", + ): "1) I know that mammals lay eggs. TRUE\n2) I know that birds lay eggs. TRUE\n3) I know that birds are mammals. TRUE", + _REVISED_ANSWER_TEMPLATE.format( + checked_assertions="1) I know that mammals lay eggs. TRUE\n2) I know that birds lay eggs. TRUE\n3) I know that birds are mammals. TRUE", + question="Which mammal lays the biggest eggs?", + ): "I still don't know.", + } + fake_llm = FakeLLM(queries=queries) + return LLMCheckerChain(llm=fake_llm, input_key="q", output_key="a") + + +def test_simple_question(fake_llm_checker_chain: LLMCheckerChain) -> None: + """Test simple question that should not need python.""" + question = "Which mammal lays the biggest eggs?" + output = fake_llm_checker_chain.run(question) + assert output == "I still don't know." diff --git a/tests/unit_tests/chains/test_llm_math.py b/tests/unit_tests/chains/test_llm_math.py new file mode 100644 index 0000000000000000000000000000000000000000..b38d89dd2a05323a03cecfbeef38d827dfea408d --- /dev/null +++ b/tests/unit_tests/chains/test_llm_math.py @@ -0,0 +1,40 @@ +"""Test LLM Math functionality.""" + +import pytest + +from langchain.chains.llm_math.base import LLMMathChain +from langchain.chains.llm_math.prompt import _PROMPT_TEMPLATE +from tests.unit_tests.llms.fake_llm import FakeLLM + + +@pytest.fixture +def fake_llm_math_chain() -> LLMMathChain: + """Fake LLM Math chain for testing.""" + complex_question = _PROMPT_TEMPLATE.format(question="What is the square root of 2?") + queries = { + _PROMPT_TEMPLATE.format(question="What is 1 plus 1?"): "Answer: 2", + complex_question: "```python\nprint(2**.5)\n```", + _PROMPT_TEMPLATE.format(question="foo"): "foo", + } + fake_llm = FakeLLM(queries=queries) + return LLMMathChain(llm=fake_llm, input_key="q", output_key="a") + + +def test_simple_question(fake_llm_math_chain: LLMMathChain) -> None: + """Test simple question that should not need python.""" + question = "What is 1 plus 1?" + output = fake_llm_math_chain.run(question) + assert output == "Answer: 2" + + +def test_complex_question(fake_llm_math_chain: LLMMathChain) -> None: + """Test complex question that should need python.""" + question = "What is the square root of 2?" + output = fake_llm_math_chain.run(question) + assert output == f"Answer: {2**.5}\n" + + +def test_error(fake_llm_math_chain: LLMMathChain) -> None: + """Test question that raises error.""" + with pytest.raises(ValueError): + fake_llm_math_chain.run("foo") diff --git a/tests/unit_tests/chains/test_llm_summarization_checker.py b/tests/unit_tests/chains/test_llm_summarization_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..81e4a8fa1c64563fda72e9e25a9fa822e7afa8eb --- /dev/null +++ b/tests/unit_tests/chains/test_llm_summarization_checker.py @@ -0,0 +1,44 @@ +# flake8: noqa E501 + +"""Test LLMSummarization functionality.""" + +import pytest + +from langchain.chains.llm_summarization_checker.base import ( + ARE_ALL_TRUE_PROMPT, + CHECK_ASSERTIONS_PROMPT, + CREATE_ASSERTIONS_PROMPT, + REVISED_SUMMARY_PROMPT, + LLMSummarizationCheckerChain, +) +from tests.unit_tests.llms.fake_llm import FakeLLM + + +@pytest.fixture +def fake_llm_summarization_checker_chain() -> LLMSummarizationCheckerChain: + """Fake LLMCheckerChain for testing.""" + queries = { + CREATE_ASSERTIONS_PROMPT.format( + summary="a", + ): "b", + CHECK_ASSERTIONS_PROMPT.format( + assertions="b", + ): "- b - True", + REVISED_SUMMARY_PROMPT.format( + checked_assertions="- b - True", summary="a" + ): "b", + ARE_ALL_TRUE_PROMPT.format( + checked_assertions="- b - True", + ): "True", + } + fake_llm = FakeLLM(queries=queries) + return LLMSummarizationCheckerChain(llm=fake_llm, input_key="q", output_key="a") + + +def test_simple_text( + fake_llm_summarization_checker_chain: LLMSummarizationCheckerChain, +) -> None: + """Test simple question that should not need python.""" + question = "a" + output = fake_llm_summarization_checker_chain.run(question) + assert output == "b" diff --git a/tests/unit_tests/chains/test_memory.py b/tests/unit_tests/chains/test_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..d727358ca917406005d8ab753a04922ac4bd6e06 --- /dev/null +++ b/tests/unit_tests/chains/test_memory.py @@ -0,0 +1,37 @@ +import pytest + +from langchain.chains.conversation.memory import ( + ConversationBufferMemory, + ConversationBufferWindowMemory, + ConversationSummaryMemory, +) +from langchain.memory import ReadOnlySharedMemory, SimpleMemory +from langchain.schema import BaseMemory +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_simple_memory() -> None: + """Test SimpleMemory.""" + memory = SimpleMemory(memories={"baz": "foo"}) + + output = memory.load_memory_variables({}) + + assert output == {"baz": "foo"} + assert ["baz"] == memory.memory_variables + + +@pytest.mark.parametrize( + "memory", + [ + ConversationBufferMemory(memory_key="baz"), + ConversationSummaryMemory(llm=FakeLLM(), memory_key="baz"), + ConversationBufferWindowMemory(memory_key="baz"), + ], +) +def test_readonly_memory(memory: BaseMemory) -> None: + read_only_memory = ReadOnlySharedMemory(memory=memory) + memory.save_context({"input": "bar"}, {"output": "foo"}) + + assert read_only_memory.load_memory_variables({}) == memory.load_memory_variables( + {} + ) diff --git a/tests/unit_tests/chains/test_natbot.py b/tests/unit_tests/chains/test_natbot.py new file mode 100644 index 0000000000000000000000000000000000000000..0beaa409cedd18d2e7feaacebe835e906b113c05 --- /dev/null +++ b/tests/unit_tests/chains/test_natbot.py @@ -0,0 +1,50 @@ +"""Test functionality related to natbot.""" + +from typing import Any, List, Mapping, Optional + +from pydantic import BaseModel + +from langchain.chains.natbot.base import NatBotChain +from langchain.llms.base import LLM + + +class FakeLLM(LLM, BaseModel): + """Fake LLM wrapper for testing purposes.""" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Return `foo` if longer than 10000 words, else `bar`.""" + if len(prompt) > 10000: + return "foo" + else: + return "bar" + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake" + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {} + + +def test_proper_inputs() -> None: + """Test that natbot shortens inputs correctly.""" + nat_bot_chain = NatBotChain(llm=FakeLLM(), objective="testing") + url = "foo" * 10000 + browser_content = "foo" * 10000 + output = nat_bot_chain.execute(url, browser_content) + assert output == "bar" + + +def test_variable_key_naming() -> None: + """Test that natbot handles variable key naming correctly.""" + nat_bot_chain = NatBotChain( + llm=FakeLLM(), + objective="testing", + input_url_key="u", + input_browser_content_key="b", + output_key="c", + ) + output = nat_bot_chain.execute("foo", "foo") + assert output == "bar" diff --git a/tests/unit_tests/chains/test_sequential.py b/tests/unit_tests/chains/test_sequential.py new file mode 100644 index 0000000000000000000000000000000000000000..74947f9f783ee9696c4f04afefefc6ebc656f112 --- /dev/null +++ b/tests/unit_tests/chains/test_sequential.py @@ -0,0 +1,154 @@ +"""Test pipeline functionality.""" +from typing import Dict, List + +import pytest +from pydantic import BaseModel + +from langchain.chains.base import Chain +from langchain.chains.sequential import SequentialChain, SimpleSequentialChain +from langchain.memory.simple import SimpleMemory + + +class FakeChain(Chain, BaseModel): + """Fake Chain for testing purposes.""" + + input_variables: List[str] + output_variables: List[str] + + @property + def input_keys(self) -> List[str]: + """Input keys this chain returns.""" + return self.input_variables + + @property + def output_keys(self) -> List[str]: + """Input keys this chain returns.""" + return self.output_variables + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + outputs = {} + for var in self.output_variables: + variables = [inputs[k] for k in self.input_variables] + outputs[var] = f"{' '.join(variables)}foo" + return outputs + + +def test_sequential_usage_single_inputs() -> None: + """Test sequential on single input chains.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + chain = SequentialChain(chains=[chain_1, chain_2], input_variables=["foo"]) + output = chain({"foo": "123"}) + expected_output = {"baz": "123foofoo", "foo": "123"} + assert output == expected_output + + +def test_sequential_usage_multiple_inputs() -> None: + """Test sequential on multiple input chains.""" + chain_1 = FakeChain(input_variables=["foo", "test"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar", "foo"], output_variables=["baz"]) + chain = SequentialChain(chains=[chain_1, chain_2], input_variables=["foo", "test"]) + output = chain({"foo": "123", "test": "456"}) + expected_output = { + "baz": "123 456foo 123foo", + "foo": "123", + "test": "456", + } + assert output == expected_output + + +def test_sequential_usage_memory() -> None: + """Test sequential usage with memory.""" + memory = SimpleMemory(memories={"zab": "rab"}) + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + chain = SequentialChain( + memory=memory, chains=[chain_1, chain_2], input_variables=["foo"] + ) + output = chain({"foo": "123"}) + expected_output = {"baz": "123foofoo", "foo": "123", "zab": "rab"} + assert output == expected_output + + +def test_sequential_usage_multiple_outputs() -> None: + """Test sequential usage on multiple output chains.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar", "test"]) + chain_2 = FakeChain(input_variables=["bar", "foo"], output_variables=["baz"]) + chain = SequentialChain(chains=[chain_1, chain_2], input_variables=["foo"]) + output = chain({"foo": "123"}) + expected_output = { + "baz": "123foo 123foo", + "foo": "123", + } + assert output == expected_output + + +def test_sequential_missing_inputs() -> None: + """Test error is raised when input variables are missing.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar", "test"], output_variables=["baz"]) + with pytest.raises(ValueError): + # Also needs "test" as an input + SequentialChain(chains=[chain_1, chain_2], input_variables=["foo"]) + + +def test_sequential_bad_outputs() -> None: + """Test error is raised when bad outputs are specified.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + with pytest.raises(ValueError): + # "test" is not present as an output variable. + SequentialChain( + chains=[chain_1, chain_2], + input_variables=["foo"], + output_variables=["test"], + ) + + +def test_sequential_valid_outputs() -> None: + """Test chain runs when valid outputs are specified.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + chain = SequentialChain( + chains=[chain_1, chain_2], + input_variables=["foo"], + output_variables=["bar", "baz"], + ) + output = chain({"foo": "123"}, return_only_outputs=True) + expected_output = {"baz": "123foofoo", "bar": "123foo"} + assert output == expected_output + + +def test_sequential_overlapping_inputs() -> None: + """Test error is raised when input variables are overlapping.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar", "test"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + with pytest.raises(ValueError): + # "test" is specified as an input, but also is an output of one step + SequentialChain(chains=[chain_1, chain_2], input_variables=["foo", "test"]) + + +def test_simple_sequential_functionality() -> None: + """Test simple sequential functionality.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + chain = SimpleSequentialChain(chains=[chain_1, chain_2]) + output = chain({"input": "123"}) + expected_output = {"output": "123foofoo", "input": "123"} + assert output == expected_output + + +def test_multi_input_errors() -> None: + """Test simple sequential errors if multiple input variables are expected.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar"]) + chain_2 = FakeChain(input_variables=["bar", "foo"], output_variables=["baz"]) + with pytest.raises(ValueError): + SimpleSequentialChain(chains=[chain_1, chain_2]) + + +def test_multi_output_errors() -> None: + """Test simple sequential errors if multiple output variables are expected.""" + chain_1 = FakeChain(input_variables=["foo"], output_variables=["bar", "grok"]) + chain_2 = FakeChain(input_variables=["bar"], output_variables=["baz"]) + with pytest.raises(ValueError): + SimpleSequentialChain(chains=[chain_1, chain_2]) diff --git a/tests/unit_tests/chains/test_transform.py b/tests/unit_tests/chains/test_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..a4dbca25de1316841061ff514bf41e8ba566abe6 --- /dev/null +++ b/tests/unit_tests/chains/test_transform.py @@ -0,0 +1,40 @@ +"""Test transform chain.""" +from typing import Dict + +import pytest + +from langchain.chains.transform import TransformChain + + +def dummy_transform(inputs: Dict[str, str]) -> Dict[str, str]: + """Transform a dummy input for tests.""" + outputs = inputs + outputs["greeting"] = f"{inputs['first_name']} {inputs['last_name']} says hello" + del outputs["first_name"] + del outputs["last_name"] + return outputs + + +def test_tranform_chain() -> None: + """Test basic transform chain.""" + transform_chain = TransformChain( + input_variables=["first_name", "last_name"], + output_variables=["greeting"], + transform=dummy_transform, + ) + input_dict = {"first_name": "Leroy", "last_name": "Jenkins"} + response = transform_chain(input_dict) + expected_response = {"greeting": "Leroy Jenkins says hello"} + assert response == expected_response + + +def test_transform_chain_bad_inputs() -> None: + """Test basic transform chain.""" + transform_chain = TransformChain( + input_variables=["first_name", "last_name"], + output_variables=["greeting"], + transform=dummy_transform, + ) + input_dict = {"name": "Leroy", "last_name": "Jenkins"} + with pytest.raises(ValueError): + _ = transform_chain(input_dict) diff --git a/tests/unit_tests/data/prompt_file.txt b/tests/unit_tests/data/prompt_file.txt new file mode 100644 index 0000000000000000000000000000000000000000..0681c36f48e27c2b7580ae9ad8aa6611648242f8 --- /dev/null +++ b/tests/unit_tests/data/prompt_file.txt @@ -0,0 +1,2 @@ +Question: {question} +Answer: \ No newline at end of file diff --git a/tests/unit_tests/data/prompts/prompt_extra_args.json b/tests/unit_tests/data/prompts/prompt_extra_args.json new file mode 100644 index 0000000000000000000000000000000000000000..4bfc4fdcc4be603c284f477ea37abb9be9343e2e --- /dev/null +++ b/tests/unit_tests/data/prompts/prompt_extra_args.json @@ -0,0 +1,5 @@ +{ + "input_variables": ["foo"], + "template": "This is a {foo} test.", + "bad_var": 1 +} \ No newline at end of file diff --git a/tests/unit_tests/data/prompts/prompt_missing_args.json b/tests/unit_tests/data/prompts/prompt_missing_args.json new file mode 100644 index 0000000000000000000000000000000000000000..cb69d843e7ac552be4a6beffa6c7ca87eb64de31 --- /dev/null +++ b/tests/unit_tests/data/prompts/prompt_missing_args.json @@ -0,0 +1,3 @@ +{ + "input_variables": ["foo"] +} \ No newline at end of file diff --git a/tests/unit_tests/data/prompts/simple_prompt.json b/tests/unit_tests/data/prompts/simple_prompt.json new file mode 100644 index 0000000000000000000000000000000000000000..d0f72b1c14f60dfbcaf2ac3c1935fad930af33aa --- /dev/null +++ b/tests/unit_tests/data/prompts/simple_prompt.json @@ -0,0 +1,4 @@ +{ + "input_variables": ["foo"], + "template": "This is a {foo} test." +} \ No newline at end of file diff --git a/tests/unit_tests/docstore/__init__.py b/tests/unit_tests/docstore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0744b34d078c3ff6f4a2ce69ccdc1e1f1b7025f0 --- /dev/null +++ b/tests/unit_tests/docstore/__init__.py @@ -0,0 +1 @@ +"""Test functionality related to the docstore objects.""" diff --git a/tests/unit_tests/docstore/test_document.py b/tests/unit_tests/docstore/test_document.py new file mode 100644 index 0000000000000000000000000000000000000000..03946c08535e8d25b4b56986907049f1b1412fc7 --- /dev/null +++ b/tests/unit_tests/docstore/test_document.py @@ -0,0 +1,59 @@ +"""Test document functionality.""" +from langchain.docstore.document import Document + +_PAGE_CONTENT = """This is a page about LangChain. + +It is a really cool framework. + +What isn't there to love about langchain? + +Made in 2022.""" + + +def test_document_summary() -> None: + """Test that we extract the summary okay.""" + page = Document(page_content=_PAGE_CONTENT) + assert page.summary == "This is a page about LangChain." + + +def test_document_lookup() -> None: + """Test that can lookup things okay.""" + page = Document(page_content=_PAGE_CONTENT) + + # Start with lookup on "LangChain". + output = page.lookup("LangChain") + assert output == "(Result 1/2) This is a page about LangChain." + + # Now switch to looking up "framework". + output = page.lookup("framework") + assert output == "(Result 1/1) It is a really cool framework." + + # Now switch back to looking up "LangChain", should reset. + output = page.lookup("LangChain") + assert output == "(Result 1/2) This is a page about LangChain." + + # Lookup "LangChain" again, should go to the next mention. + output = page.lookup("LangChain") + assert output == "(Result 2/2) What isn't there to love about langchain?" + + +def test_document_lookups_dont_exist() -> None: + """Test lookup on term that doesn't exist in the document.""" + page = Document(page_content=_PAGE_CONTENT) + + # Start with lookup on "harrison". + output = page.lookup("harrison") + assert output == "No Results" + + +def test_document_lookups_too_many() -> None: + """Test lookup on term too many times.""" + page = Document(page_content=_PAGE_CONTENT) + + # Start with lookup on "framework". + output = page.lookup("framework") + assert output == "(Result 1/1) It is a really cool framework." + + # Now try again, should be exhausted. + output = page.lookup("framework") + assert output == "No More Results" diff --git a/tests/unit_tests/docstore/test_inmemory.py b/tests/unit_tests/docstore/test_inmemory.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe9104c22d45dd4c46b87ffb8c97977af5135df --- /dev/null +++ b/tests/unit_tests/docstore/test_inmemory.py @@ -0,0 +1,56 @@ +"""Test in memory docstore.""" +import pytest + +from langchain.docstore.document import Document +from langchain.docstore.in_memory import InMemoryDocstore + + +def test_document_found() -> None: + """Test document found.""" + _dict = {"foo": Document(page_content="bar")} + docstore = InMemoryDocstore(_dict) + output = docstore.search("foo") + assert isinstance(output, Document) + assert output.page_content == "bar" + + +def test_document_not_found() -> None: + """Test when document is not found.""" + _dict = {"foo": Document(page_content="bar")} + docstore = InMemoryDocstore(_dict) + output = docstore.search("bar") + assert output == "ID bar not found." + + +def test_adding_document() -> None: + """Test that documents are added correctly.""" + _dict = {"foo": Document(page_content="bar")} + docstore = InMemoryDocstore(_dict) + new_dict = {"bar": Document(page_content="foo")} + docstore.add(new_dict) + + # Test that you can find new document. + foo_output = docstore.search("bar") + assert isinstance(foo_output, Document) + assert foo_output.page_content == "foo" + + # Test that old document is the same. + bar_output = docstore.search("foo") + assert isinstance(bar_output, Document) + assert bar_output.page_content == "bar" + + +def test_adding_document_already_exists() -> None: + """Test that error is raised if document id already exists.""" + _dict = {"foo": Document(page_content="bar")} + docstore = InMemoryDocstore(_dict) + new_dict = {"foo": Document(page_content="foo")} + + # Test that error is raised. + with pytest.raises(ValueError): + docstore.add(new_dict) + + # Test that old document is the same. + bar_output = docstore.search("foo") + assert isinstance(bar_output, Document) + assert bar_output.page_content == "bar" diff --git a/tests/unit_tests/document_loader/__init__.py b/tests/unit_tests/document_loader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/unit_tests/document_loader/test_youtube.py b/tests/unit_tests/document_loader/test_youtube.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/unit_tests/llms/__init__.py b/tests/unit_tests/llms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..95bd682b9acffa611081c9dd0e624e1e8cca17a6 --- /dev/null +++ b/tests/unit_tests/llms/__init__.py @@ -0,0 +1 @@ +"""All unit tests for LLM objects.""" diff --git a/tests/unit_tests/llms/fake_llm.py b/tests/unit_tests/llms/fake_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8b3462f00c4ff250112198513e27c541bdfc2c --- /dev/null +++ b/tests/unit_tests/llms/fake_llm.py @@ -0,0 +1,30 @@ +"""Fake LLM wrapper for testing purposes.""" +from typing import Any, List, Mapping, Optional + +from pydantic import BaseModel + +from langchain.llms.base import LLM + + +class FakeLLM(LLM, BaseModel): + """Fake LLM wrapper for testing purposes.""" + + queries: Optional[Mapping] = None + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "fake" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """First try to lookup in queries, else return 'foo' or 'bar'.""" + if self.queries is not None: + return self.queries[prompt] + if stop is None: + return "foo" + else: + return "bar" + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {} diff --git a/tests/unit_tests/llms/test_base.py b/tests/unit_tests/llms/test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..55ce2c3243a8fa9392374af045740bfe5dd1b3c6 --- /dev/null +++ b/tests/unit_tests/llms/test_base.py @@ -0,0 +1,75 @@ +"""Test base LLM functionality.""" +from sqlalchemy import Column, Integer, Sequence, String, create_engine + +try: + from sqlalchemy.orm import declarative_base +except ImportError: + from sqlalchemy.ext.declarative import declarative_base + +import langchain +from langchain.cache import InMemoryCache, SQLAlchemyCache +from langchain.schema import Generation, LLMResult +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_caching() -> None: + """Test caching behavior.""" + langchain.llm_cache = InMemoryCache() + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + langchain.llm_cache.update("foo", llm_string, [Generation(text="fizz")]) + output = llm.generate(["foo", "bar", "foo"]) + expected_cache_output = [Generation(text="foo")] + cache_output = langchain.llm_cache.lookup("bar", llm_string) + assert cache_output == expected_cache_output + langchain.llm_cache = None + expected_generations = [ + [Generation(text="fizz")], + [Generation(text="foo")], + [Generation(text="fizz")], + ] + expected_output = LLMResult( + generations=expected_generations, + llm_output=None, + ) + assert output == expected_output + + +def test_custom_caching() -> None: + """Test custom_caching behavior.""" + Base = declarative_base() + + class FulltextLLMCache(Base): # type: ignore + """Postgres table for fulltext-indexed LLM Cache.""" + + __tablename__ = "llm_cache_fulltext" + id = Column(Integer, Sequence("cache_id"), primary_key=True) + prompt = Column(String, nullable=False) + llm = Column(String, nullable=False) + idx = Column(Integer) + response = Column(String) + + engine = create_engine("sqlite://") + langchain.llm_cache = SQLAlchemyCache(engine, FulltextLLMCache) + llm = FakeLLM() + params = llm.dict() + params["stop"] = None + llm_string = str(sorted([(k, v) for k, v in params.items()])) + langchain.llm_cache.update("foo", llm_string, [Generation(text="fizz")]) + output = llm.generate(["foo", "bar", "foo"]) + expected_cache_output = [Generation(text="foo")] + cache_output = langchain.llm_cache.lookup("bar", llm_string) + assert cache_output == expected_cache_output + langchain.llm_cache = None + expected_generations = [ + [Generation(text="fizz")], + [Generation(text="foo")], + [Generation(text="fizz")], + ] + expected_output = LLMResult( + generations=expected_generations, + llm_output=None, + ) + assert output == expected_output diff --git a/tests/unit_tests/llms/test_callbacks.py b/tests/unit_tests/llms/test_callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..d9d52630b7fdcdc464c96946ddf2369645556e2a --- /dev/null +++ b/tests/unit_tests/llms/test_callbacks.py @@ -0,0 +1,30 @@ +"""Test LLM callbacks.""" +from langchain.callbacks.base import CallbackManager +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler +from tests.unit_tests.llms.fake_llm import FakeLLM + + +def test_llm_with_callbacks() -> None: + """Test LLM callbacks.""" + handler = FakeCallbackHandler() + llm = FakeLLM(callback_manager=CallbackManager(handlers=[handler]), verbose=True) + output = llm("foo") + assert output == "foo" + assert handler.starts == 1 + assert handler.ends == 1 + assert handler.errors == 0 + + +def test_llm_with_callbacks_not_verbose() -> None: + """Test LLM callbacks but not verbose.""" + import langchain + + langchain.verbose = False + + handler = FakeCallbackHandler() + llm = FakeLLM(callback_manager=CallbackManager(handlers=[handler])) + output = llm("foo") + assert output == "foo" + assert handler.starts == 0 + assert handler.ends == 0 + assert handler.errors == 0 diff --git a/tests/unit_tests/llms/test_loading.py b/tests/unit_tests/llms/test_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..e478a0b0b3860cec099d714f961528877687495a --- /dev/null +++ b/tests/unit_tests/llms/test_loading.py @@ -0,0 +1,15 @@ +"""Test LLM saving and loading functions.""" +from pathlib import Path +from unittest.mock import patch + +from langchain.llms.loading import load_llm +from tests.unit_tests.llms.fake_llm import FakeLLM + + +@patch("langchain.llms.loading.type_to_cls_dict", {"fake": FakeLLM}) +def test_saving_loading_round_trip(tmp_path: Path) -> None: + """Test saving/loading a Fake LLM.""" + fake_llm = FakeLLM() + fake_llm.save(file_path=tmp_path / "fake_llm.yaml") + loaded_llm = load_llm(tmp_path / "fake_llm.yaml") + assert loaded_llm == fake_llm diff --git a/tests/unit_tests/llms/test_utils.py b/tests/unit_tests/llms/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..77cff607c5df2ace179176be5ebda9bb9e79c06a --- /dev/null +++ b/tests/unit_tests/llms/test_utils.py @@ -0,0 +1,22 @@ +"""Test LLM utility functions.""" +from langchain.llms.utils import enforce_stop_tokens + + +def test_enforce_stop_tokens() -> None: + """Test removing stop tokens when they occur.""" + text = "foo bar baz" + output = enforce_stop_tokens(text, ["moo", "baz"]) + assert output == "foo bar " + text = "foo bar baz" + output = enforce_stop_tokens(text, ["moo", "baz", "bar"]) + assert output == "foo " + text = "foo bar baz" + output = enforce_stop_tokens(text, ["moo", "bar"]) + assert output == "foo " + + +def test_enforce_stop_tokens_none() -> None: + """Test removing stop tokens when they do not occur.""" + text = "foo bar baz" + output = enforce_stop_tokens(text, ["moo"]) + assert output == "foo bar baz" diff --git a/tests/unit_tests/output_parsers/__init__.py b/tests/unit_tests/output_parsers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/unit_tests/output_parsers/test_regex_dict.py b/tests/unit_tests/output_parsers/test_regex_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..09df585aedc244170d98e48be95c48e123599f43 --- /dev/null +++ b/tests/unit_tests/output_parsers/test_regex_dict.py @@ -0,0 +1,37 @@ +"""Test in memory docstore.""" +from langchain.output_parsers.regex_dict import RegexDictParser + +DEF_EXPECTED_RESULT = {"action": "Search", "action_input": "How to use this class?"} + +DEF_OUTPUT_KEY_TO_FORMAT = {"action": "Action", "action_input": "Action Input"} + +DEF_README = """We have just received a new result from the LLM, and our next step is +to filter and read its format using regular expressions to identify specific fields, +such as: + +- Action: Search +- Action Input: How to use this class? +- Additional Fields: "N/A" + +To assist us in this task, we use the regex_dict class. This class allows us to send a +dictionary containing an output key and the expected format, which in turn enables us to +retrieve the result of the matching formats and extract specific information from it. + +To exclude irrelevant information from our return dictionary, we can instruct the LLM to +use a specific command that notifies us when it doesn't know the answer. We call this +variable the "no_update_value", and for our current case, we set it to "N/A". Therefore, +we expect the result to only contain the following fields: +{ + {key = action, value = search} + {key = action_input, value = "How to use this class?"}. +}""" + + +def test_regex_dict_result() -> None: + """Test regex dict result.""" + regex_dict_parser = RegexDictParser( + output_key_to_format=DEF_OUTPUT_KEY_TO_FORMAT, no_update_value="N/A" + ) + result_dict = regex_dict_parser.parse(DEF_README) + print("parse_result:", result_dict) + assert DEF_EXPECTED_RESULT == result_dict diff --git a/tests/unit_tests/prompts/__init__.py b/tests/unit_tests/prompts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc72afe0c4daba3d28c5eb651bccacec3a35d337 --- /dev/null +++ b/tests/unit_tests/prompts/__init__.py @@ -0,0 +1 @@ +"""Test prompt functionality.""" diff --git a/tests/unit_tests/prompts/test_chat.py b/tests/unit_tests/prompts/test_chat.py new file mode 100644 index 0000000000000000000000000000000000000000..6defde6991b41ed22a8c4b13923697614cbbac7b --- /dev/null +++ b/tests/unit_tests/prompts/test_chat.py @@ -0,0 +1,104 @@ +from typing import List + +from langchain.prompts import PromptTemplate +from langchain.prompts.chat import ( + AIMessagePromptTemplate, + BaseMessagePromptTemplate, + ChatMessagePromptTemplate, + ChatPromptTemplate, + ChatPromptValue, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.schema import HumanMessage + + +def create_messages() -> List[BaseMessagePromptTemplate]: + """Create messages.""" + system_message_prompt = SystemMessagePromptTemplate( + prompt=PromptTemplate( + template="Here's some context: {context}", + input_variables=["context"], + ) + ) + human_message_prompt = HumanMessagePromptTemplate( + prompt=PromptTemplate( + template="Hello {foo}, I'm {bar}. Thanks for the {context}", + input_variables=["foo", "bar", "context"], + ) + ) + ai_message_prompt = AIMessagePromptTemplate( + prompt=PromptTemplate( + template="I'm an AI. I'm {foo}. I'm {bar}.", + input_variables=["foo", "bar"], + ) + ) + chat_message_prompt = ChatMessagePromptTemplate( + role="test", + prompt=PromptTemplate( + template="I'm a generic message. I'm {foo}. I'm {bar}.", + input_variables=["foo", "bar"], + ), + ) + return [ + system_message_prompt, + human_message_prompt, + ai_message_prompt, + chat_message_prompt, + ] + + +def create_chat_prompt_template() -> ChatPromptTemplate: + """Create a chat prompt template.""" + return ChatPromptTemplate( + input_variables=["foo", "bar", "context"], + messages=create_messages(), + ) + + +def test_chat_prompt_template() -> None: + """Test chat prompt template.""" + prompt_template = create_chat_prompt_template() + prompt = prompt_template.format_prompt(foo="foo", bar="bar", context="context") + assert isinstance(prompt, ChatPromptValue) + messages = prompt.to_messages() + assert len(messages) == 4 + assert messages[0].content == "Here's some context: context" + assert messages[1].content == "Hello foo, I'm bar. Thanks for the context" + assert messages[2].content == "I'm an AI. I'm foo. I'm bar." + assert messages[3].content == "I'm a generic message. I'm foo. I'm bar." + + string = prompt.to_string() + expected = ( + "System: Here's some context: context\n" + "Human: Hello foo, I'm bar. Thanks for the context\n" + "AI: I'm an AI. I'm foo. I'm bar.\n" + "test: I'm a generic message. I'm foo. I'm bar." + ) + assert string == expected + + string = prompt_template.format(foo="foo", bar="bar", context="context") + assert string == expected + + +def test_chat_prompt_template_from_messages() -> None: + """Test creating a chat prompt template from messages.""" + chat_prompt_template = ChatPromptTemplate.from_messages(create_messages()) + assert sorted(chat_prompt_template.input_variables) == sorted( + ["context", "foo", "bar"] + ) + assert len(chat_prompt_template.messages) == 4 + + +def test_chat_prompt_template_with_messages() -> None: + messages = create_messages() + [HumanMessage(content="foo")] + chat_prompt_template = ChatPromptTemplate.from_messages(messages) + assert sorted(chat_prompt_template.input_variables) == sorted( + ["context", "foo", "bar"] + ) + assert len(chat_prompt_template.messages) == 5 + prompt_value = chat_prompt_template.format_prompt( + context="see", foo="this", bar="magic" + ) + prompt_value_messages = prompt_value.to_messages() + assert prompt_value_messages[-1] == HumanMessage(content="foo") diff --git a/tests/unit_tests/prompts/test_few_shot.py b/tests/unit_tests/prompts/test_few_shot.py new file mode 100644 index 0000000000000000000000000000000000000000..22ba8e511e8c01a2f51f8d6fde7e96891073b06e --- /dev/null +++ b/tests/unit_tests/prompts/test_few_shot.py @@ -0,0 +1,176 @@ +"""Test few shot prompt template.""" +import pytest + +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate + +EXAMPLE_PROMPT = PromptTemplate( + input_variables=["question", "answer"], template="{question}: {answer}" +) + + +def test_suffix_only() -> None: + """Test prompt works with just a suffix.""" + suffix = "This is a {foo} test." + input_variables = ["foo"] + prompt = FewShotPromptTemplate( + input_variables=input_variables, + suffix=suffix, + examples=[], + example_prompt=EXAMPLE_PROMPT, + ) + output = prompt.format(foo="bar") + expected_output = "This is a bar test." + assert output == expected_output + + +def test_prompt_missing_input_variables() -> None: + """Test error is raised when input variables are not provided.""" + # Test when missing in suffix + template = "This is a {foo} test." + with pytest.raises(ValueError): + FewShotPromptTemplate( + input_variables=[], + suffix=template, + examples=[], + example_prompt=EXAMPLE_PROMPT, + ) + + # Test when missing in prefix + template = "This is a {foo} test." + with pytest.raises(ValueError): + FewShotPromptTemplate( + input_variables=[], + suffix="foo", + examples=[], + prefix=template, + example_prompt=EXAMPLE_PROMPT, + ) + + +def test_prompt_extra_input_variables() -> None: + """Test error is raised when there are too many input variables.""" + template = "This is a {foo} test." + input_variables = ["foo", "bar"] + with pytest.raises(ValueError): + FewShotPromptTemplate( + input_variables=input_variables, + suffix=template, + examples=[], + example_prompt=EXAMPLE_PROMPT, + ) + + +def test_few_shot_functionality() -> None: + """Test that few shot works with examples.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["content", "new_content"], + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(content="animals", new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output + + +def test_partial_init_string() -> None: + """Test prompt can be initialized with partial variables.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["new_content"], + partial_variables={"content": "animals"}, + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output + + +def test_partial_init_func() -> None: + """Test prompt can be initialized with partial variables.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["new_content"], + partial_variables={"content": lambda: "animals"}, + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output + + +def test_partial() -> None: + """Test prompt can be partialed.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["content", "new_content"], + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + new_prompt = prompt.partial(content="foo") + new_output = new_prompt.format(new_content="party") + expected_output = ( + "This is a test about foo.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert new_output == expected_output + output = prompt.format(new_content="party", content="bar") + expected_output = ( + "This is a test about bar.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output diff --git a/tests/unit_tests/prompts/test_few_shot_with_templates.py b/tests/unit_tests/prompts/test_few_shot_with_templates.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c10d743e1f9cba70e4e677aeb8e63d948188b7 --- /dev/null +++ b/tests/unit_tests/prompts/test_few_shot_with_templates.py @@ -0,0 +1,40 @@ +"""Test few shot prompt template.""" + +from langchain.prompts.few_shot_with_templates import FewShotPromptWithTemplates +from langchain.prompts.prompt import PromptTemplate + +EXAMPLE_PROMPT = PromptTemplate( + input_variables=["question", "answer"], template="{question}: {answer}" +) + + +def test_prompttemplate_prefix_suffix() -> None: + """Test that few shot works when prefix and suffix are PromptTemplates.""" + prefix = PromptTemplate( + input_variables=["content"], template="This is a test about {content}." + ) + suffix = PromptTemplate( + input_variables=["new_content"], + template="Now you try to talk about {new_content}.", + ) + + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptWithTemplates( + suffix=suffix, + prefix=prefix, + input_variables=["content", "new_content"], + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(content="animals", new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output diff --git a/tests/unit_tests/prompts/test_length_based_example_selector.py b/tests/unit_tests/prompts/test_length_based_example_selector.py new file mode 100644 index 0000000000000000000000000000000000000000..38fd689c4e8d5e615ef6c6b34e82e4ee4594ba60 --- /dev/null +++ b/tests/unit_tests/prompts/test_length_based_example_selector.py @@ -0,0 +1,57 @@ +"""Test functionality related to length based selector.""" +import pytest + +from langchain.prompts.example_selector.length_based import LengthBasedExampleSelector +from langchain.prompts.prompt import PromptTemplate + +EXAMPLES = [ + {"question": "Question: who are you?\nAnswer: foo"}, + {"question": "Question: who are you?\nAnswer: foo"}, +] + + +@pytest.fixture +def selector() -> LengthBasedExampleSelector: + """Get length based selector to use in tests.""" + prompts = PromptTemplate(input_variables=["question"], template="{question}") + selector = LengthBasedExampleSelector( + examples=EXAMPLES, + example_prompt=prompts, + max_length=30, + ) + return selector + + +def test_selector_valid(selector: LengthBasedExampleSelector) -> None: + """Test LengthBasedExampleSelector can select examples..""" + short_question = "Short question?" + output = selector.select_examples({"question": short_question}) + assert output == EXAMPLES + + +def test_selector_add_example(selector: LengthBasedExampleSelector) -> None: + """Test LengthBasedExampleSelector can add an example.""" + new_example = {"question": "Question: what are you?\nAnswer: bar"} + selector.add_example(new_example) + short_question = "Short question?" + output = selector.select_examples({"question": short_question}) + assert output == EXAMPLES + [new_example] + + +def test_selector_trims_one_example(selector: LengthBasedExampleSelector) -> None: + """Test LengthBasedExampleSelector can trim one example.""" + long_question = """I am writing a really long question, + this probably is going to affect the example right?""" + output = selector.select_examples({"question": long_question}) + assert output == EXAMPLES[:1] + + +def test_selector_trims_all_examples( + selector: LengthBasedExampleSelector, +) -> None: + """Test LengthBasedExampleSelector can trim all examples.""" + longest_question = """This question is super super super, + super super super super super super super super super super super, + super super super super long, this will affect the example right?""" + output = selector.select_examples({"question": longest_question}) + assert output == [] diff --git a/tests/unit_tests/prompts/test_loading.py b/tests/unit_tests/prompts/test_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..16d60bdec878d6485d93468143fede9146454550 --- /dev/null +++ b/tests/unit_tests/prompts/test_loading.py @@ -0,0 +1,162 @@ +"""Test loading functionality.""" + +import os +from contextlib import contextmanager +from pathlib import Path +from typing import Iterator + +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.loading import load_prompt +from langchain.prompts.prompt import PromptTemplate + + +@contextmanager +def change_directory() -> Iterator: + """Change the working directory to the right folder.""" + origin = Path().absolute() + try: + os.chdir("docs/modules/prompts/examples") + yield + finally: + os.chdir(origin) + + +def test_loading_from_YAML() -> None: + """Test loading from yaml file.""" + with change_directory(): + prompt = load_prompt("simple_prompt.yaml") + expected_prompt = PromptTemplate( + input_variables=["adjective", "content"], + template="Tell me a {adjective} joke about {content}.", + ) + assert prompt == expected_prompt + + +def test_loading_from_JSON() -> None: + """Test loading from json file.""" + with change_directory(): + prompt = load_prompt("simple_prompt.json") + expected_prompt = PromptTemplate( + input_variables=["adjective", "content"], + template="Tell me a {adjective} joke about {content}.", + ) + assert prompt == expected_prompt + + +def test_saving_loading_round_trip(tmp_path: Path) -> None: + """Test equality when saving and loading a prompt.""" + simple_prompt = PromptTemplate( + input_variables=["adjective", "content"], + template="Tell me a {adjective} joke about {content}.", + ) + simple_prompt.save(file_path=tmp_path / "prompt.yaml") + loaded_prompt = load_prompt(tmp_path / "prompt.yaml") + assert loaded_prompt == simple_prompt + + few_shot_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml") + loaded_prompt = load_prompt(tmp_path / "few_shot.yaml") + assert loaded_prompt == few_shot_prompt + + +def test_loading_with_template_as_file() -> None: + """Test loading when the template is a file.""" + with change_directory(): + prompt = load_prompt("simple_prompt_with_template_file.json") + expected_prompt = PromptTemplate( + input_variables=["adjective", "content"], + template="Tell me a {adjective} joke about {content}.", + ) + assert prompt == expected_prompt + + +def test_loading_few_shot_prompt_from_yaml() -> None: + """Test loading few shot prompt from yaml.""" + with change_directory(): + prompt = load_prompt("few_shot_prompt.yaml") + expected_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + assert prompt == expected_prompt + + +def test_loading_few_shot_prompt_from_json() -> None: + """Test loading few shot prompt from json.""" + with change_directory(): + prompt = load_prompt("few_shot_prompt.json") + expected_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + assert prompt == expected_prompt + + +def test_loading_few_shot_prompt_when_examples_in_config() -> None: + """Test loading few shot prompt when the examples are in the config.""" + with change_directory(): + prompt = load_prompt("few_shot_prompt_examples_in.json") + expected_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + assert prompt == expected_prompt + + +def test_loading_few_shot_prompt_example_prompt() -> None: + """Test loading few shot when the example prompt is in its own file.""" + with change_directory(): + prompt = load_prompt("few_shot_prompt_example_prompt.json") + expected_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + assert prompt == expected_prompt diff --git a/tests/unit_tests/prompts/test_prompt.py b/tests/unit_tests/prompts/test_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..d7f388743340b8dcf744de15afb328b220e0745b --- /dev/null +++ b/tests/unit_tests/prompts/test_prompt.py @@ -0,0 +1,147 @@ +"""Test functionality related to prompts.""" +import pytest + +from langchain.prompts.prompt import PromptTemplate + + +def test_prompt_valid() -> None: + """Test prompts can be constructed.""" + template = "This is a {foo} test." + input_variables = ["foo"] + prompt = PromptTemplate(input_variables=input_variables, template=template) + assert prompt.template == template + assert prompt.input_variables == input_variables + + +def test_prompt_from_template() -> None: + """Test prompts can be constructed from a template.""" + # Single input variable. + template = "This is a {foo} test." + prompt = PromptTemplate.from_template(template) + expected_prompt = PromptTemplate(template=template, input_variables=["foo"]) + assert prompt == expected_prompt + + # Multiple input variables. + template = "This {bar} is a {foo} test." + prompt = PromptTemplate.from_template(template) + expected_prompt = PromptTemplate(template=template, input_variables=["bar", "foo"]) + assert prompt == expected_prompt + + # Multiple input variables with repeats. + template = "This {bar} is a {foo} test {foo}." + prompt = PromptTemplate.from_template(template) + expected_prompt = PromptTemplate(template=template, input_variables=["bar", "foo"]) + assert prompt == expected_prompt + + +def test_prompt_missing_input_variables() -> None: + """Test error is raised when input variables are not provided.""" + template = "This is a {foo} test." + input_variables: list = [] + with pytest.raises(ValueError): + PromptTemplate(input_variables=input_variables, template=template) + + +def test_prompt_extra_input_variables() -> None: + """Test error is raised when there are too many input variables.""" + template = "This is a {foo} test." + input_variables = ["foo", "bar"] + with pytest.raises(ValueError): + PromptTemplate(input_variables=input_variables, template=template) + + +def test_prompt_wrong_input_variables() -> None: + """Test error is raised when name of input variable is wrong.""" + template = "This is a {foo} test." + input_variables = ["bar"] + with pytest.raises(ValueError): + PromptTemplate(input_variables=input_variables, template=template) + + +def test_prompt_from_examples_valid() -> None: + """Test prompt can be successfully constructed from examples.""" + template = """Test Prompt: + +Question: who are you? +Answer: foo + +Question: what are you? +Answer: bar + +Question: {question} +Answer:""" + input_variables = ["question"] + example_separator = "\n\n" + prefix = """Test Prompt:""" + suffix = """Question: {question}\nAnswer:""" + examples = [ + """Question: who are you?\nAnswer: foo""", + """Question: what are you?\nAnswer: bar""", + ] + prompt_from_examples = PromptTemplate.from_examples( + examples, + suffix, + input_variables, + example_separator=example_separator, + prefix=prefix, + ) + prompt_from_template = PromptTemplate( + input_variables=input_variables, template=template + ) + assert prompt_from_examples.template == prompt_from_template.template + assert prompt_from_examples.input_variables == prompt_from_template.input_variables + + +def test_prompt_invalid_template_format() -> None: + """Test initializing a prompt with invalid template format.""" + template = "This is a {foo} test." + input_variables = ["foo"] + with pytest.raises(ValueError): + PromptTemplate( + input_variables=input_variables, template=template, template_format="bar" + ) + + +def test_prompt_from_file() -> None: + """Test prompt can be successfully constructed from a file.""" + template_file = "tests/unit_tests/data/prompt_file.txt" + input_variables = ["question"] + prompt = PromptTemplate.from_file(template_file, input_variables) + assert prompt.template == "Question: {question}\nAnswer:" + + +def test_partial_init_string() -> None: + """Test prompt can be initialized with partial variables.""" + template = "This is a {foo} test." + prompt = PromptTemplate( + input_variables=[], template=template, partial_variables={"foo": 1} + ) + assert prompt.template == template + assert prompt.input_variables == [] + result = prompt.format() + assert result == "This is a 1 test." + + +def test_partial_init_func() -> None: + """Test prompt can be initialized with partial variables.""" + template = "This is a {foo} test." + prompt = PromptTemplate( + input_variables=[], template=template, partial_variables={"foo": lambda: 2} + ) + assert prompt.template == template + assert prompt.input_variables == [] + result = prompt.format() + assert result == "This is a 2 test." + + +def test_partial() -> None: + """Test prompt can be partialed.""" + template = "This is a {foo} test." + prompt = PromptTemplate(input_variables=["foo"], template=template) + assert prompt.template == template + assert prompt.input_variables == ["foo"] + new_prompt = prompt.partial(foo="3") + new_result = new_prompt.format() + assert new_result == "This is a 3 test." + result = prompt.format(foo="foo") + assert result == "This is a foo test." diff --git a/tests/unit_tests/prompts/test_utils.py b/tests/unit_tests/prompts/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..479d02e8bd97bf790057a1da909e0e8868f1f4d4 --- /dev/null +++ b/tests/unit_tests/prompts/test_utils.py @@ -0,0 +1,9 @@ +"""Test functionality related to prompt utils.""" +from langchain.prompts.example_selector.semantic_similarity import sorted_values + + +def test_sorted_vals() -> None: + """Test sorted values from dictionary.""" + test_dict = {"key2": "val2", "key1": "val1"} + expected_response = ["val1", "val2"] + assert sorted_values(test_dict) == expected_response diff --git a/tests/unit_tests/test_bash.py b/tests/unit_tests/test_bash.py new file mode 100644 index 0000000000000000000000000000000000000000..aa5a30f1b44a33f4748b48ac5bcd19915552e6db --- /dev/null +++ b/tests/unit_tests/test_bash.py @@ -0,0 +1,53 @@ +"""Test the bash utility.""" +import re +import subprocess +from pathlib import Path + +from langchain.utilities.bash import BashProcess + + +def test_pwd_command() -> None: + """Test correct functionality.""" + session = BashProcess() + commands = ["pwd"] + output = session.run(commands) + + assert output == subprocess.check_output("pwd", shell=True).decode() + + +def test_incorrect_command() -> None: + """Test handling of incorrect command.""" + session = BashProcess() + output = session.run(["invalid_command"]) + assert output == "Command 'invalid_command' returned non-zero exit status 127." + + +def test_incorrect_command_return_err_output() -> None: + """Test optional returning of shell output on incorrect command.""" + session = BashProcess(return_err_output=True) + output = session.run(["invalid_command"]) + assert re.match(r"^/bin/sh:.*invalid_command.*not found.*$", output) + + +def test_create_directory_and_files(tmp_path: Path) -> None: + """Test creation of a directory and files in a temporary directory.""" + session = BashProcess(strip_newlines=True) + + # create a subdirectory in the temporary directory + temp_dir = tmp_path / "test_dir" + temp_dir.mkdir() + + # run the commands in the temporary directory + commands = [ + f"touch {temp_dir}/file1.txt", + f"touch {temp_dir}/file2.txt", + f"echo 'hello world' > {temp_dir}/file2.txt", + f"cat {temp_dir}/file2.txt", + ] + + output = session.run(commands) + assert output == "hello world" + + # check that the files were created in the temporary directory + output = session.run([f"ls {temp_dir}"]) + assert output == "file1.txt\nfile2.txt" diff --git a/tests/unit_tests/test_formatting.py b/tests/unit_tests/test_formatting.py new file mode 100644 index 0000000000000000000000000000000000000000..168e580b7b9f2748515fa353feb96e29bc0309a4 --- /dev/null +++ b/tests/unit_tests/test_formatting.py @@ -0,0 +1,26 @@ +"""Test formatting functionality.""" +import pytest + +from langchain.formatting import formatter + + +def test_valid_formatting() -> None: + """Test formatting works as expected.""" + template = "This is a {foo} test." + output = formatter.format(template, foo="good") + expected_output = "This is a good test." + assert output == expected_output + + +def test_does_not_allow_args() -> None: + """Test formatting raises error when args are provided.""" + template = "This is a {} test." + with pytest.raises(ValueError): + formatter.format(template, "good") + + +def test_does_not_allow_extra_kwargs() -> None: + """Test formatting does not allow extra key word arguments.""" + template = "This is a {foo} test." + with pytest.raises(KeyError): + formatter.format(template, foo="good", bar="oops") diff --git a/tests/unit_tests/test_python.py b/tests/unit_tests/test_python.py new file mode 100644 index 0000000000000000000000000000000000000000..d1eb3ae921ef71530eff1b0ed46811a1cf3a7c93 --- /dev/null +++ b/tests/unit_tests/test_python.py @@ -0,0 +1,55 @@ +"""Test functionality of Python REPL.""" + +from langchain.python import PythonREPL + + +def test_python_repl() -> None: + """Test functionality when globals/locals are not provided.""" + repl = PythonREPL() + + # Run a simple initial command. + repl.run("foo = 1") + assert repl.locals is not None + assert repl.locals["foo"] == 1 + + # Now run a command that accesses `foo` to make sure it still has it. + repl.run("bar = foo * 2") + assert repl.locals is not None + assert repl.locals["bar"] == 2 + + +def test_python_repl_no_previous_variables() -> None: + """Test that it does not have access to variables created outside the scope.""" + foo = 3 # noqa: F841 + repl = PythonREPL() + output = repl.run("print(foo)") + assert output == "name 'foo' is not defined" + + +def test_python_repl_pass_in_locals() -> None: + """Test functionality when passing in locals.""" + _locals = {"foo": 4} + repl = PythonREPL(_locals=_locals) + repl.run("bar = foo * 2") + assert repl.locals is not None + assert repl.locals["bar"] == 8 + + +def test_functionality() -> None: + """Test correct functionality.""" + chain = PythonREPL() + code = "print(1 + 1)" + output = chain.run(code) + assert output == "2\n" + + +def test_function() -> None: + """Test correct functionality.""" + chain = PythonREPL() + code = "def add(a, b): " " return a + b" + output = chain.run(code) + assert output == "" + + code = "print(add(1, 2))" + output = chain.run(code) + assert output == "3\n" diff --git a/tests/unit_tests/test_sql_database.py b/tests/unit_tests/test_sql_database.py new file mode 100644 index 0000000000000000000000000000000000000000..3da40c5a5acdc754846d12c7eeec5ed6fd2fc513 --- /dev/null +++ b/tests/unit_tests/test_sql_database.py @@ -0,0 +1,125 @@ +# flake8: noqa=E501 +"""Test SQL database wrapper.""" + +from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine, insert + +from langchain.sql_database import SQLDatabase + +metadata_obj = MetaData() + +user = Table( + "user", + metadata_obj, + Column("user_id", Integer, primary_key=True), + Column("user_name", String(16), nullable=False), +) + +company = Table( + "company", + metadata_obj, + Column("company_id", Integer, primary_key=True), + Column("company_location", String, nullable=False), +) + + +def test_table_info() -> None: + """Test that table info is constructed properly.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + db = SQLDatabase(engine) + output = db.table_info + expected_output = """ + CREATE TABLE user ( + user_id INTEGER NOT NULL, + user_name VARCHAR(16) NOT NULL, + PRIMARY KEY (user_id) + ) + /* + 3 rows from user table: + user_id user_name + /* + + + CREATE TABLE company ( + company_id INTEGER NOT NULL, + company_location VARCHAR NOT NULL, + PRIMARY KEY (company_id) + ) + /* + 3 rows from company table: + company_id company_location + */ + """ + + assert sorted(" ".join(output.split())) == sorted(" ".join(expected_output.split())) + + +def test_table_info_w_sample_rows() -> None: + """Test that table info is constructed properly.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + values = [ + {"user_id": 13, "user_name": "Harrison"}, + {"user_id": 14, "user_name": "Chase"}, + ] + stmt = insert(user).values(values) + with engine.begin() as conn: + conn.execute(stmt) + + db = SQLDatabase(engine, sample_rows_in_table_info=2) + + output = db.table_info + + expected_output = """ + CREATE TABLE company ( + company_id INTEGER NOT NULL, + company_location VARCHAR NOT NULL, + PRIMARY KEY (company_id) +) + /* + 2 rows from company table: + company_id company_location + */ + + CREATE TABLE user ( + user_id INTEGER NOT NULL, + user_name VARCHAR(16) NOT NULL, + PRIMARY KEY (user_id) + ) + /* + 2 rows from user table: + user_id user_name + 13 Harrison + 14 Chase + */ + """ + + assert sorted(output.split()) == sorted(expected_output.split()) + + +def test_sql_database_run() -> None: + """Test that commands can be run successfully and returned in correct format.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison") + with engine.begin() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + command = "select user_name from user where user_id = 13" + output = db.run(command) + expected_output = "[('Harrison',)]" + assert output == expected_output + + +def test_sql_database_run_update() -> None: + """Test commands which return no rows return an empty string.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison") + with engine.begin() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + command = "update user set user_name='Updated' where user_id = 13" + output = db.run(command) + expected_output = "" + assert output == expected_output diff --git a/tests/unit_tests/test_sql_database_schema.py b/tests/unit_tests/test_sql_database_schema.py new file mode 100644 index 0000000000000000000000000000000000000000..58a0ea37b4e1e40e2238dd5f9248bf63fa4823c8 --- /dev/null +++ b/tests/unit_tests/test_sql_database_schema.py @@ -0,0 +1,77 @@ +# flake8: noqa +"""Test SQL database wrapper with schema support. + +Using DuckDB as SQLite does not support schemas. +""" + +from sqlalchemy import ( + Column, + Integer, + MetaData, + Sequence, + String, + Table, + create_engine, + event, + insert, + schema, +) + +from langchain.sql_database import SQLDatabase + +metadata_obj = MetaData() + +event.listen(metadata_obj, "before_create", schema.CreateSchema("schema_a")) +event.listen(metadata_obj, "before_create", schema.CreateSchema("schema_b")) + +user = Table( + "user", + metadata_obj, + Column("user_id", Integer, Sequence("user_id_seq"), primary_key=True), + Column("user_name", String, nullable=False), + schema="schema_a", +) + +company = Table( + "company", + metadata_obj, + Column("company_id", Integer, Sequence("company_id_seq"), primary_key=True), + Column("company_location", String, nullable=False), + schema="schema_b", +) + + +def test_table_info() -> None: + """Test that table info is constructed properly.""" + engine = create_engine("duckdb:///:memory:") + metadata_obj.create_all(engine) + + db = SQLDatabase(engine, schema="schema_a", metadata=metadata_obj) + output = db.table_info + expected_output = """ + CREATE TABLE schema_a."user" ( + user_id INTEGER NOT NULL, + user_name VARCHAR NOT NULL, + PRIMARY KEY (user_id) + ) + /* + 3 rows from user table: + user_id user_name + */ + """ + + assert sorted(" ".join(output.split())) == sorted(" ".join(expected_output.split())) + + +def test_sql_database_run() -> None: + """Test that commands can be run successfully and returned in correct format.""" + engine = create_engine("duckdb:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison") + with engine.begin() as conn: + conn.execute(stmt) + db = SQLDatabase(engine, schema="schema_a") + command = 'select user_name from "user" where user_id = 13' + output = db.run(command) + expected_output = "[('Harrison',)]" + assert output == expected_output diff --git a/tests/unit_tests/test_text_splitter.py b/tests/unit_tests/test_text_splitter.py new file mode 100644 index 0000000000000000000000000000000000000000..dbfb9b5fe2b12169685504622007c3c03af26c79 --- /dev/null +++ b/tests/unit_tests/test_text_splitter.py @@ -0,0 +1,139 @@ +"""Test text splitting functionality.""" +import pytest + +from langchain.docstore.document import Document +from langchain.text_splitter import ( + CharacterTextSplitter, + RecursiveCharacterTextSplitter, +) + + +def test_character_text_splitter() -> None: + """Test splitting by character count.""" + text = "foo bar baz 123" + splitter = CharacterTextSplitter(separator=" ", chunk_size=7, chunk_overlap=3) + output = splitter.split_text(text) + expected_output = ["foo bar", "bar baz", "baz 123"] + assert output == expected_output + + +def test_character_text_splitter_empty_doc() -> None: + """Test splitting by character count doesn't create empty documents.""" + text = "foo bar" + splitter = CharacterTextSplitter(separator=" ", chunk_size=2, chunk_overlap=0) + output = splitter.split_text(text) + expected_output = ["foo", "bar"] + assert output == expected_output + + +def test_character_text_splitter_separtor_empty_doc() -> None: + """Test edge cases are separators.""" + text = "f b" + splitter = CharacterTextSplitter(separator=" ", chunk_size=2, chunk_overlap=0) + output = splitter.split_text(text) + expected_output = ["f", "b"] + assert output == expected_output + + +def test_character_text_splitter_long() -> None: + """Test splitting by character count on long words.""" + text = "foo bar baz a a" + splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=1) + output = splitter.split_text(text) + expected_output = ["foo", "bar", "baz", "a a"] + assert output == expected_output + + +def test_character_text_splitter_short_words_first() -> None: + """Test splitting by character count when shorter words are first.""" + text = "a a foo bar baz" + splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=1) + output = splitter.split_text(text) + expected_output = ["a a", "foo", "bar", "baz"] + assert output == expected_output + + +def test_character_text_splitter_longer_words() -> None: + """Test splitting by characters when splits not found easily.""" + text = "foo bar baz 123" + splitter = CharacterTextSplitter(separator=" ", chunk_size=1, chunk_overlap=1) + output = splitter.split_text(text) + expected_output = ["foo", "bar", "baz", "123"] + assert output == expected_output + + +def test_character_text_splitting_args() -> None: + """Test invalid arguments.""" + with pytest.raises(ValueError): + CharacterTextSplitter(chunk_size=2, chunk_overlap=4) + + +def test_create_documents() -> None: + """Test create documents method.""" + texts = ["foo bar", "baz"] + splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0) + docs = splitter.create_documents(texts) + expected_docs = [ + Document(page_content="foo"), + Document(page_content="bar"), + Document(page_content="baz"), + ] + assert docs == expected_docs + + +def test_create_documents_with_metadata() -> None: + """Test create documents with metadata method.""" + texts = ["foo bar", "baz"] + splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0) + docs = splitter.create_documents(texts, [{"source": "1"}, {"source": "2"}]) + expected_docs = [ + Document(page_content="foo", metadata={"source": "1"}), + Document(page_content="bar", metadata={"source": "1"}), + Document(page_content="baz", metadata={"source": "2"}), + ] + assert docs == expected_docs + + +def test_metadata_not_shallow() -> None: + """Test that metadatas are not shallow.""" + texts = ["foo bar"] + splitter = CharacterTextSplitter(separator=" ", chunk_size=3, chunk_overlap=0) + docs = splitter.create_documents(texts, [{"source": "1"}]) + expected_docs = [ + Document(page_content="foo", metadata={"source": "1"}), + Document(page_content="bar", metadata={"source": "1"}), + ] + assert docs == expected_docs + docs[0].metadata["foo"] = 1 + assert docs[0].metadata == {"source": "1", "foo": 1} + assert docs[1].metadata == {"source": "1"} + + +def test_iterative_text_splitter() -> None: + """Test iterative text splitter.""" + text = """Hi.\n\nI'm Harrison.\n\nHow? Are? You?\nOkay then f f f f. +This is a weird text to write, but gotta test the splittingggg some how. + +Bye!\n\n-H.""" + splitter = RecursiveCharacterTextSplitter(chunk_size=10, chunk_overlap=1) + output = splitter.split_text(text) + expected_output = [ + "Hi.", + "I'm", + "Harrison.", + "How? Are?", + "You?", + "Okay then", + "f f f f.", + "This is a", + "a weird", + "text to", + "write, but", + "gotta test", + "the", + "splittingg", + "ggg", + "some how.", + "Bye!\n\n-H.", + ] + assert output == expected_output diff --git a/tests/unit_tests/tools/__init__.py b/tests/unit_tests/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c5216691d9228c372458e113e84c928a392650b --- /dev/null +++ b/tests/unit_tests/tools/__init__.py @@ -0,0 +1 @@ +"""Test suite for the tools module.""" diff --git a/tests/unit_tests/tools/test_json.py b/tests/unit_tests/tools/test_json.py new file mode 100644 index 0000000000000000000000000000000000000000..36a96595e03d36f655299e0ac5914b8fbc625210 --- /dev/null +++ b/tests/unit_tests/tools/test_json.py @@ -0,0 +1,49 @@ +"""Test functionality of JSON tools.""" +from pathlib import Path + +from langchain.tools.json.tool import JsonSpec + + +def test_json_spec_from_file(tmp_path: Path) -> None: + """Test JsonSpec can be constructed from a file.""" + path = tmp_path / "test.json" + path.write_text('{"foo": "bar"}') + spec = JsonSpec.from_file(path) + assert spec.dict_ == {"foo": "bar"} + + +def test_json_spec_keys() -> None: + """Test JsonSpec can return keys of a dict at given path.""" + spec = JsonSpec(dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}) + assert spec.keys("data") == "['foo', 'baz']" + assert "ValueError" in spec.keys('data["foo"]') + assert spec.keys('data["baz"]') == "['test']" + assert spec.keys('data["baz"]["test"]') == "['foo']" + assert "ValueError" in spec.keys('data["baz"]["test"]["foo"]') + + +def test_json_spec_value() -> None: + """Test JsonSpec can return value of a dict at given path.""" + spec = JsonSpec(dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}) + assert spec.value("data") == "{'foo': 'bar', 'baz': {'test': {'foo': [1, 2, 3]}}}" + assert spec.value('data["foo"]') == "bar" + assert spec.value('data["baz"]') == "{'test': {'foo': [1, 2, 3]}}" + assert spec.value('data["baz"]["test"]') == "{'foo': [1, 2, 3]}" + assert spec.value('data["baz"]["test"]["foo"]') == "[1, 2, 3]" + + +def test_json_spec_value_max_length() -> None: + """Test JsonSpec can return value of a dict at given path.""" + spec = JsonSpec( + dict_={"foo": "bar", "baz": {"test": {"foo": [1, 2, 3]}}}, max_value_length=5 + ) + assert spec.value('data["foo"]') == "bar" + assert ( + spec.value('data["baz"]') + == "Value is a large dictionary, should explore its keys directly" + ) + assert ( + spec.value('data["baz"]["test"]') + == "Value is a large dictionary, should explore its keys directly" + ) + assert spec.value('data["baz"]["test"]["foo"]') == "[1, 2..." diff --git a/tests/unit_tests/utilities/__init__.py b/tests/unit_tests/utilities/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c3bc6c0518fbf89da2e1bb89c716778fb4eebed2 --- /dev/null +++ b/tests/unit_tests/utilities/__init__.py @@ -0,0 +1 @@ +"""Tests utilities module.""" diff --git a/tests/unit_tests/utilities/test_loading.py b/tests/unit_tests/utilities/test_loading.py new file mode 100644 index 0000000000000000000000000000000000000000..380297f740c593ce19f443eb8fb4f08542988be3 --- /dev/null +++ b/tests/unit_tests/utilities/test_loading.py @@ -0,0 +1,93 @@ +"""Test the functionality of loading from langchain-hub.""" + +import json +import re +from pathlib import Path +from typing import Iterable +from unittest.mock import Mock +from urllib.parse import urljoin + +import pytest +import responses + +from langchain.utilities.loading import DEFAULT_REF, URL_BASE, try_load_from_hub + + +@pytest.fixture(autouse=True) +def mocked_responses() -> Iterable[responses.RequestsMock]: + """Fixture mocking requests.get.""" + with responses.RequestsMock() as rsps: + yield rsps + + +def test_non_hub_path() -> None: + """Test that a non-hub path returns None.""" + path = "chains/some_path" + loader = Mock() + valid_suffixes = {"suffix"} + result = try_load_from_hub(path, loader, "chains", valid_suffixes) + + assert result is None + loader.assert_not_called() + + +def test_invalid_prefix() -> None: + """Test that a hub path with an invalid prefix returns None.""" + path = "lc://agents/some_path" + loader = Mock() + valid_suffixes = {"suffix"} + result = try_load_from_hub(path, loader, "chains", valid_suffixes) + + assert result is None + loader.assert_not_called() + + +def test_invalid_suffix() -> None: + """Test that a hub path with an invalid suffix raises an error.""" + path = "lc://chains/path.invalid" + loader = Mock() + valid_suffixes = {"json"} + + with pytest.raises(ValueError, match="Unsupported file type."): + try_load_from_hub(path, loader, "chains", valid_suffixes) + + loader.assert_not_called() + + +@pytest.mark.parametrize("ref", [None, "v0.3"]) +def test_success(mocked_responses: responses.RequestsMock, ref: str) -> None: + """Test that a valid hub path is loaded correctly with and without a ref.""" + path = "chains/path/chain.json" + lc_path_prefix = f"lc{('@' + ref) if ref else ''}://" + valid_suffixes = {"json"} + body = json.dumps({"foo": "bar"}) + ref = ref or DEFAULT_REF + + file_contents = None + + def loader(file_path: str) -> None: + nonlocal file_contents + assert file_contents is None + file_contents = Path(file_path).read_text() + + mocked_responses.get( + urljoin(URL_BASE.format(ref=ref), path), + body=body, + status=200, + content_type="application/json", + ) + + try_load_from_hub(f"{lc_path_prefix}{path}", loader, "chains", valid_suffixes) + assert file_contents == body + + +def test_failed_request(mocked_responses: responses.RequestsMock) -> None: + """Test that a failed request raises an error.""" + path = "chains/path/chain.json" + loader = Mock() + + mocked_responses.get(urljoin(URL_BASE.format(ref=DEFAULT_REF), path), status=500) + + with pytest.raises(ValueError, match=re.compile("Could not find file at .*")): + try_load_from_hub(f"lc://{path}", loader, "chains", {"json"}) + loader.assert_not_called()