Spaces:
Runtime error
Runtime error
Commit
Β·
56724c7
0
Parent(s):
π Initial commit
Browse files- .gitignore +1 -0
- README.md +45 -0
- qnabot/QnABot.py +85 -0
- qnabot/__init__.py +0 -0
- setup.py +23 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv
|
README.md
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# QnA Bot
|
2 |
+
|
3 |
+
Create a question answering over docs bot with one line of code:
|
4 |
+
|
5 |
+
```python
|
6 |
+
from qnabot import QnABot
|
7 |
+
import os
|
8 |
+
|
9 |
+
os.environ["OPENAI_API_KEY"] = "my key"
|
10 |
+
|
11 |
+
bot = QnABot(directory="./mydata")
|
12 |
+
```
|
13 |
+
|
14 |
+
### Here's how it works
|
15 |
+
|
16 |
+
High level overview what is happening under the hood:
|
17 |
+
|
18 |
+
```mermaid
|
19 |
+
sequenceDiagram
|
20 |
+
actor User
|
21 |
+
participant API
|
22 |
+
participant LLM
|
23 |
+
participant Vectorstore
|
24 |
+
participant IngestionEngine
|
25 |
+
participant DataLake
|
26 |
+
autonumber
|
27 |
+
|
28 |
+
Note over API, DataLake: Ingestion phase
|
29 |
+
loop Every X time
|
30 |
+
IngestionEngine ->> DataLake: Load documents
|
31 |
+
DataLake -->> IngestionEngine: Return data
|
32 |
+
IngestionEngine -->> IngestionEngine: Split documents and Create embeddings
|
33 |
+
IngestionEngine ->> Vectorstore: Store documents and embeddings
|
34 |
+
end
|
35 |
+
|
36 |
+
Note over API, DataLake: Generation phase
|
37 |
+
|
38 |
+
User ->> API: Receive user question
|
39 |
+
API ->> Vectorstore: Lookup documents in the index relevant to the question
|
40 |
+
API ->> API: Construct a prompt from the question and any relevant documents
|
41 |
+
API ->> LLM: Pass the prompt to the model
|
42 |
+
LLM -->> API: Get response from model
|
43 |
+
API -->> User: Return response
|
44 |
+
|
45 |
+
```
|
qnabot/QnABot.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import necessary libraries and modules
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.chat_models import ChatOpenAI
|
4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
5 |
+
from langchain.document_loaders import DirectoryLoader, S3DirectoryLoader
|
6 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
7 |
+
from langchain.vectorstores.faiss import FAISS
|
8 |
+
import pickle
|
9 |
+
import os
|
10 |
+
|
11 |
+
|
12 |
+
class QnABot:
|
13 |
+
def __init__(
|
14 |
+
self,
|
15 |
+
directory: str,
|
16 |
+
index: str | None = None,
|
17 |
+
model: str | None = None,
|
18 |
+
temperature=0,
|
19 |
+
):
|
20 |
+
# Initialize the QnABot by selecting a model, creating a loader,
|
21 |
+
# and loading or creating an index
|
22 |
+
self.select_model(model, temperature)
|
23 |
+
self.create_loader(directory)
|
24 |
+
self.load_or_create_index(index)
|
25 |
+
|
26 |
+
# Load the question-answering chain for the selected model
|
27 |
+
self.chain = load_qa_with_sources_chain(self.llm)
|
28 |
+
|
29 |
+
def select_model(self, model: str | None, temperature: float):
|
30 |
+
# Select and set the appropriate model based on the provided input
|
31 |
+
if model is None or model == "gpt-3.5-turbo":
|
32 |
+
print("Using model: gpt-3.5-turbo")
|
33 |
+
self.llm = ChatOpenAI(temperature=temperature)
|
34 |
+
|
35 |
+
if model == "text-davinci-003":
|
36 |
+
print("Using model: text-davinci-003")
|
37 |
+
self.llm = OpenAI(temperature=temperature)
|
38 |
+
|
39 |
+
def create_loader(self, directory: str):
|
40 |
+
# Create a loader based on the provided directory (either local or S3)
|
41 |
+
if directory.startswith("s3://"):
|
42 |
+
self.loader = S3DirectoryLoader(directory)
|
43 |
+
else:
|
44 |
+
self.loader = DirectoryLoader(directory, recursive=True)
|
45 |
+
|
46 |
+
def load_or_create_index(self, index_path: str | None):
|
47 |
+
# Load an existing index from disk or create a new one if not available
|
48 |
+
if index_path is not None and os.path.exists(index_path):
|
49 |
+
print("Loading path from disk...")
|
50 |
+
with open(index_path, "rb") as f:
|
51 |
+
self.search_index = pickle.load(f)
|
52 |
+
else:
|
53 |
+
print("Creating index...")
|
54 |
+
self.search_index = FAISS.from_documents(
|
55 |
+
self.loader.load_and_split(), OpenAIEmbeddings()
|
56 |
+
)
|
57 |
+
|
58 |
+
def save_index(self, index_path: str):
|
59 |
+
# Save the index to the specified path
|
60 |
+
with open(index_path, "wb") as f:
|
61 |
+
pickle.dump(self.search_index, f)
|
62 |
+
|
63 |
+
def print_answer(self, question, k=1):
|
64 |
+
# Retrieve and print the answer to the given question
|
65 |
+
input_documents = self.search_index.similarity_search(question, k=k)
|
66 |
+
print(
|
67 |
+
self.chain(
|
68 |
+
{
|
69 |
+
"input_documents": input_documents,
|
70 |
+
"question": question,
|
71 |
+
},
|
72 |
+
return_only_outputs=True,
|
73 |
+
)["output_text"]
|
74 |
+
)
|
75 |
+
|
76 |
+
def get_answer(self, question, k=1):
|
77 |
+
# Retrieve the answer to the given question and return it
|
78 |
+
input_documents = self.search_index.similarity_search(question, k=k)
|
79 |
+
return self.chain(
|
80 |
+
{
|
81 |
+
"input_documents": input_documents,
|
82 |
+
"question": question,
|
83 |
+
},
|
84 |
+
return_only_outputs=True,
|
85 |
+
)["output_text"]
|
qnabot/__init__.py
ADDED
File without changes
|
setup.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name="qnabot",
|
5 |
+
version="0.0.1",
|
6 |
+
packages=find_packages(),
|
7 |
+
install_requires=[
|
8 |
+
# List your package's dependencies here, e.g.,
|
9 |
+
# "numpy>=1.18",
|
10 |
+
],
|
11 |
+
author="Megaklis Vasilakis",
|
12 |
+
author_email="megaklis.vasilakis@gmail.com",
|
13 |
+
description="Create a question answering over docs bot with one line of code.",
|
14 |
+
long_description=open("README.md").read(),
|
15 |
+
long_description_content_type="text/markdown",
|
16 |
+
url="https://github.com/momegas/qnabot",
|
17 |
+
classifiers=[
|
18 |
+
# Choose appropriate classifiers from
|
19 |
+
# https://pypi.org/classifiers/
|
20 |
+
|
21 |
+
Development Status :: 4 - Beta
|
22 |
+
],
|
23 |
+
)
|