Spaces:
Runtime error
Runtime error
💾 Adding a vectorstore (first implementation)
Browse files- .gitignore +3 -1
- example.ipynb +93 -21
- megabots/__init__.py +21 -14
- megabots/vectorstores.py +33 -0
.gitignore
CHANGED
@@ -6,4 +6,6 @@ dist
|
|
6 |
build
|
7 |
**.pickle
|
8 |
**.pkl
|
9 |
-
.env
|
|
|
|
|
|
6 |
build
|
7 |
**.pickle
|
8 |
**.pkl
|
9 |
+
.env
|
10 |
+
volumes
|
11 |
+
docker-compose.yml
|
example.ipynb
CHANGED
@@ -1,31 +1,40 @@
|
|
1 |
{
|
2 |
"cells": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"data": {
|
10 |
-
"text/plain": [
|
11 |
-
"True"
|
12 |
-
]
|
13 |
-
},
|
14 |
-
"execution_count": 8,
|
15 |
-
"metadata": {},
|
16 |
-
"output_type": "execute_result"
|
17 |
-
}
|
18 |
-
],
|
19 |
"source": [
|
20 |
"from megabots import bot\n",
|
21 |
-
"from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
"\n",
|
23 |
-
"
|
24 |
]
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "code",
|
28 |
-
"execution_count":
|
29 |
"metadata": {},
|
30 |
"outputs": [
|
31 |
{
|
@@ -33,7 +42,7 @@
|
|
33 |
"output_type": "stream",
|
34 |
"text": [
|
35 |
"Using model: gpt-3.5-turbo\n",
|
36 |
-
"Loading path from
|
37 |
]
|
38 |
},
|
39 |
{
|
@@ -42,7 +51,7 @@
|
|
42 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
43 |
]
|
44 |
},
|
45 |
-
"execution_count":
|
46 |
"metadata": {},
|
47 |
"output_type": "execute_result"
|
48 |
}
|
@@ -52,9 +61,19 @@
|
|
52 |
"qnabot.ask(\"what was the first roster of the avengers?\")"
|
53 |
]
|
54 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
{
|
56 |
"cell_type": "code",
|
57 |
-
"execution_count":
|
58 |
"metadata": {},
|
59 |
"outputs": [
|
60 |
{
|
@@ -62,7 +81,7 @@
|
|
62 |
"output_type": "stream",
|
63 |
"text": [
|
64 |
"Using model: gpt-3.5-turbo\n",
|
65 |
-
"Loading path from
|
66 |
]
|
67 |
},
|
68 |
{
|
@@ -71,7 +90,7 @@
|
|
71 |
"\"Hmmm! Let me think about that... Ah yes, the original Avengers lineup included Iron Man, Thor, Hulk, Ant-Man, and the Wasp. They were like the ultimate superhero squad, except for maybe the Teenage Mutant Ninja Turtles. But let's be real, they were just a bunch of turtles who liked pizza.\""
|
72 |
]
|
73 |
},
|
74 |
-
"execution_count":
|
75 |
"metadata": {},
|
76 |
"output_type": "execute_result"
|
77 |
}
|
@@ -97,6 +116,59 @@
|
|
97 |
")\n",
|
98 |
"qnabot.ask(\"what was the first roster of the avengers?\")\n"
|
99 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
}
|
101 |
],
|
102 |
"metadata": {
|
|
|
1 |
{
|
2 |
"cells": [
|
3 |
+
{
|
4 |
+
"attachments": {},
|
5 |
+
"cell_type": "markdown",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Examples\n",
|
9 |
+
"\n",
|
10 |
+
"Below you can find some examples of how to use the 🤖 `Megabots` library."
|
11 |
+
]
|
12 |
+
},
|
13 |
{
|
14 |
"cell_type": "code",
|
15 |
+
"execution_count": 13,
|
16 |
"metadata": {},
|
17 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"source": [
|
19 |
"from megabots import bot\n",
|
20 |
+
"from dotenv import load_dotenv"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"attachments": {},
|
25 |
+
"cell_type": "markdown",
|
26 |
+
"metadata": {},
|
27 |
+
"source": [
|
28 |
+
"### Creating a bot\n",
|
29 |
+
"\n",
|
30 |
+
"The `bot` object is the main object of the library. It is used to create a bot and to interact with it.\n",
|
31 |
"\n",
|
32 |
+
"The `index` argument specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index`"
|
33 |
]
|
34 |
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
+
"execution_count": 14,
|
38 |
"metadata": {},
|
39 |
"outputs": [
|
40 |
{
|
|
|
42 |
"output_type": "stream",
|
43 |
"text": [
|
44 |
"Using model: gpt-3.5-turbo\n",
|
45 |
+
"Loading path from pickle file: ./index.pkl ...\n"
|
46 |
]
|
47 |
},
|
48 |
{
|
|
|
51 |
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
52 |
]
|
53 |
},
|
54 |
+
"execution_count": 14,
|
55 |
"metadata": {},
|
56 |
"output_type": "execute_result"
|
57 |
}
|
|
|
61 |
"qnabot.ask(\"what was the first roster of the avengers?\")"
|
62 |
]
|
63 |
},
|
64 |
+
{
|
65 |
+
"attachments": {},
|
66 |
+
"cell_type": "markdown",
|
67 |
+
"metadata": {},
|
68 |
+
"source": [
|
69 |
+
"### Changing the bot's prompt\n",
|
70 |
+
"\n",
|
71 |
+
"You can change the bots promnpt to customize it to your needs."
|
72 |
+
]
|
73 |
+
},
|
74 |
{
|
75 |
"cell_type": "code",
|
76 |
+
"execution_count": 15,
|
77 |
"metadata": {},
|
78 |
"outputs": [
|
79 |
{
|
|
|
81 |
"output_type": "stream",
|
82 |
"text": [
|
83 |
"Using model: gpt-3.5-turbo\n",
|
84 |
+
"Loading path from pickle file: ./index.pkl ...\n"
|
85 |
]
|
86 |
},
|
87 |
{
|
|
|
90 |
"\"Hmmm! Let me think about that... Ah yes, the original Avengers lineup included Iron Man, Thor, Hulk, Ant-Man, and the Wasp. They were like the ultimate superhero squad, except for maybe the Teenage Mutant Ninja Turtles. But let's be real, they were just a bunch of turtles who liked pizza.\""
|
91 |
]
|
92 |
},
|
93 |
+
"execution_count": 15,
|
94 |
"metadata": {},
|
95 |
"output_type": "execute_result"
|
96 |
}
|
|
|
116 |
")\n",
|
117 |
"qnabot.ask(\"what was the first roster of the avengers?\")\n"
|
118 |
]
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"attachments": {},
|
122 |
+
"cell_type": "markdown",
|
123 |
+
"metadata": {},
|
124 |
+
"source": [
|
125 |
+
"### Using Megabots with Milvus\n",
|
126 |
+
"\n",
|
127 |
+
"Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.\n",
|
128 |
+
"\n",
|
129 |
+
"In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.\n",
|
130 |
+
"The command is:\n",
|
131 |
+
" \n",
|
132 |
+
"```bash\n",
|
133 |
+
"wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py\n",
|
134 |
+
"```\n",
|
135 |
+
"You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"cell_type": "code",
|
140 |
+
"execution_count": 11,
|
141 |
+
"metadata": {},
|
142 |
+
"outputs": [
|
143 |
+
{
|
144 |
+
"name": "stdout",
|
145 |
+
"output_type": "stream",
|
146 |
+
"text": [
|
147 |
+
"Using model: gpt-3.5-turbo\n"
|
148 |
+
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"data": {
|
152 |
+
"text/plain": [
|
153 |
+
"'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
"execution_count": 11,
|
157 |
+
"metadata": {},
|
158 |
+
"output_type": "execute_result"
|
159 |
+
}
|
160 |
+
],
|
161 |
+
"source": [
|
162 |
+
"from megabots import bot, vectorstore\n",
|
163 |
+
"\n",
|
164 |
+
"# Create a vectorstore object. Default port is 19530 and default host is localhost\n",
|
165 |
+
"milvus = vectorstore(\"milvus\")\n",
|
166 |
+
"\n",
|
167 |
+
"# Point it to your files directory so that it can index the files and add them to the vectorstore\n",
|
168 |
+
"bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=milvus)\n",
|
169 |
+
"\n",
|
170 |
+
"bot.ask(\"what was the first roster of the avengers?\")\n"
|
171 |
+
]
|
172 |
}
|
173 |
],
|
174 |
"metadata": {
|
megabots/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
|
|
1 |
from langchain.llms import OpenAI
|
2 |
from langchain.chat_models import ChatOpenAI
|
3 |
from langchain.embeddings import OpenAIEmbeddings
|
4 |
-
from langchain.document_loaders import DirectoryLoader, S3DirectoryLoader
|
5 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
6 |
from langchain.vectorstores.faiss import FAISS
|
7 |
import gradio as gr
|
@@ -11,10 +11,9 @@ import os
|
|
11 |
from dotenv import load_dotenv
|
12 |
from langchain.prompts import PromptTemplate
|
13 |
from langchain.chains.question_answering import load_qa_chain
|
14 |
-
from langchain.chains.conversational_retrieval.prompts import
|
15 |
-
|
16 |
-
|
17 |
-
)
|
18 |
|
19 |
load_dotenv()
|
20 |
|
@@ -25,15 +24,17 @@ class Bot:
|
|
25 |
model: str | None = None,
|
26 |
prompt_template: str | None = None,
|
27 |
prompt_variables: list[str] | None = None,
|
28 |
-
memory: str | None = None,
|
29 |
index: str | None = None,
|
30 |
sources: bool | None = False,
|
|
|
|
|
|
|
31 |
verbose: bool = False,
|
32 |
temperature: int = 0,
|
33 |
):
|
34 |
self.select_model(model, temperature)
|
35 |
self.create_loader(index)
|
36 |
-
self.load_or_create_index(index)
|
37 |
|
38 |
# Load the question-answering chain for the selected model
|
39 |
self.chain = self.create_chain(
|
@@ -83,18 +84,25 @@ class Bot:
|
|
83 |
)
|
84 |
self.loader = DirectoryLoader(index, recursive=True)
|
85 |
|
86 |
-
def load_or_create_index(self,
|
87 |
# Load an existing index from disk or create a new one if not available
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
# Is pickle
|
90 |
-
if
|
91 |
-
print("Loading path from
|
92 |
-
with open(
|
93 |
self.search_index = pickle.load(f)
|
94 |
return
|
95 |
|
96 |
# Is directory
|
97 |
-
if
|
98 |
print("Creating index...")
|
99 |
self.search_index = FAISS.from_documents(
|
100 |
self.loader.load_and_split(), OpenAIEmbeddings()
|
@@ -125,9 +133,8 @@ SUPPORTED_TASKS = {
|
|
125 |
"impl": Bot,
|
126 |
"default": {
|
127 |
"model": "gpt-3.5-turbo",
|
128 |
-
"prompt": "",
|
129 |
"temperature": 0,
|
130 |
-
"index": "./
|
131 |
},
|
132 |
}
|
133 |
}
|
|
|
1 |
+
from typing import Any
|
2 |
from langchain.llms import OpenAI
|
3 |
from langchain.chat_models import ChatOpenAI
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
|
|
5 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
6 |
from langchain.vectorstores.faiss import FAISS
|
7 |
import gradio as gr
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
from langchain.prompts import PromptTemplate
|
13 |
from langchain.chains.question_answering import load_qa_chain
|
14 |
+
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
|
15 |
+
from langchain.document_loaders import DirectoryLoader
|
16 |
+
from megabots.vectorstores import vectorstore
|
|
|
17 |
|
18 |
load_dotenv()
|
19 |
|
|
|
24 |
model: str | None = None,
|
25 |
prompt_template: str | None = None,
|
26 |
prompt_variables: list[str] | None = None,
|
|
|
27 |
index: str | None = None,
|
28 |
sources: bool | None = False,
|
29 |
+
# TODO: Fix this typing
|
30 |
+
vectorstore: Any | None = None,
|
31 |
+
memory: str | None = None,
|
32 |
verbose: bool = False,
|
33 |
temperature: int = 0,
|
34 |
):
|
35 |
self.select_model(model, temperature)
|
36 |
self.create_loader(index)
|
37 |
+
self.load_or_create_index(index, vectorstore)
|
38 |
|
39 |
# Load the question-answering chain for the selected model
|
40 |
self.chain = self.create_chain(
|
|
|
84 |
)
|
85 |
self.loader = DirectoryLoader(index, recursive=True)
|
86 |
|
87 |
+
def load_or_create_index(self, index: str, vectorstore=None):
|
88 |
# Load an existing index from disk or create a new one if not available
|
89 |
+
if vectorstore is not None:
|
90 |
+
self.search_index = vectorstore.client.from_documents(
|
91 |
+
self.loader.load_and_split(),
|
92 |
+
OpenAIEmbeddings(),
|
93 |
+
connection_args={"host": vectorstore.host, "port": vectorstore.port},
|
94 |
+
)
|
95 |
+
return
|
96 |
|
97 |
# Is pickle
|
98 |
+
if index is not None and "pkl" in index or "pickle" in index:
|
99 |
+
print("Loading path from pickle file: ", index, "...")
|
100 |
+
with open(index, "rb") as f:
|
101 |
self.search_index = pickle.load(f)
|
102 |
return
|
103 |
|
104 |
# Is directory
|
105 |
+
if index is not None and os.path.isdir(index):
|
106 |
print("Creating index...")
|
107 |
self.search_index = FAISS.from_documents(
|
108 |
self.loader.load_and_split(), OpenAIEmbeddings()
|
|
|
133 |
"impl": Bot,
|
134 |
"default": {
|
135 |
"model": "gpt-3.5-turbo",
|
|
|
136 |
"temperature": 0,
|
137 |
+
"index": "./index",
|
138 |
},
|
139 |
}
|
140 |
}
|
megabots/vectorstores.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Type, TypeVar
|
2 |
+
from langchain.vectorstores import Milvus, Qdrant
|
3 |
+
from abc import ABC
|
4 |
+
|
5 |
+
|
6 |
+
class MilvusVectorStore:
|
7 |
+
def __init__(self, host: str, port: int):
|
8 |
+
self.host = host
|
9 |
+
self.port = port
|
10 |
+
self.client = Milvus
|
11 |
+
|
12 |
+
|
13 |
+
SUPPORTED_VECTORSTORES = {
|
14 |
+
"milvus": {
|
15 |
+
"impl": MilvusVectorStore,
|
16 |
+
"default": {"host": "localhost", "port": 19530},
|
17 |
+
}
|
18 |
+
}
|
19 |
+
|
20 |
+
|
21 |
+
def vectorstore(name: str) -> MilvusVectorStore:
|
22 |
+
"""Return a vectorstore object."""
|
23 |
+
|
24 |
+
if name is None:
|
25 |
+
raise RuntimeError("Impossible to instantiate a vectorstore without a name.")
|
26 |
+
|
27 |
+
if name not in SUPPORTED_VECTORSTORES:
|
28 |
+
raise ValueError(f"Vectorstore {name} is not supported.")
|
29 |
+
|
30 |
+
return SUPPORTED_VECTORSTORES[name]["impl"](
|
31 |
+
host=SUPPORTED_VECTORSTORES[name]["default"]["host"],
|
32 |
+
port=SUPPORTED_VECTORSTORES[name]["default"]["port"],
|
33 |
+
)
|