momegas commited on
Commit
56724c7
β€’
0 Parent(s):

πŸ‘€ Initial commit

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +45 -0
  3. qnabot/QnABot.py +85 -0
  4. qnabot/__init__.py +0 -0
  5. setup.py +23 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # QnA Bot
2
+
3
+ Create a question answering over docs bot with one line of code:
4
+
5
+ ```python
6
+ from qnabot import QnABot
7
+ import os
8
+
9
+ os.environ["OPENAI_API_KEY"] = "my key"
10
+
11
+ bot = QnABot(directory="./mydata")
12
+ ```
13
+
14
+ ### Here's how it works
15
+
16
+ High level overview what is happening under the hood:
17
+
18
+ ```mermaid
19
+ sequenceDiagram
20
+ actor User
21
+ participant API
22
+ participant LLM
23
+ participant Vectorstore
24
+ participant IngestionEngine
25
+ participant DataLake
26
+ autonumber
27
+
28
+ Note over API, DataLake: Ingestion phase
29
+ loop Every X time
30
+ IngestionEngine ->> DataLake: Load documents
31
+ DataLake -->> IngestionEngine: Return data
32
+ IngestionEngine -->> IngestionEngine: Split documents and Create embeddings
33
+ IngestionEngine ->> Vectorstore: Store documents and embeddings
34
+ end
35
+
36
+ Note over API, DataLake: Generation phase
37
+
38
+ User ->> API: Receive user question
39
+ API ->> Vectorstore: Lookup documents in the index relevant to the question
40
+ API ->> API: Construct a prompt from the question and any relevant documents
41
+ API ->> LLM: Pass the prompt to the model
42
+ LLM -->> API: Get response from model
43
+ API -->> User: Return response
44
+
45
+ ```
qnabot/QnABot.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries and modules
2
+ from langchain.llms import OpenAI
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ from langchain.document_loaders import DirectoryLoader, S3DirectoryLoader
6
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
7
+ from langchain.vectorstores.faiss import FAISS
8
+ import pickle
9
+ import os
10
+
11
+
12
+ class QnABot:
13
+ def __init__(
14
+ self,
15
+ directory: str,
16
+ index: str | None = None,
17
+ model: str | None = None,
18
+ temperature=0,
19
+ ):
20
+ # Initialize the QnABot by selecting a model, creating a loader,
21
+ # and loading or creating an index
22
+ self.select_model(model, temperature)
23
+ self.create_loader(directory)
24
+ self.load_or_create_index(index)
25
+
26
+ # Load the question-answering chain for the selected model
27
+ self.chain = load_qa_with_sources_chain(self.llm)
28
+
29
+ def select_model(self, model: str | None, temperature: float):
30
+ # Select and set the appropriate model based on the provided input
31
+ if model is None or model == "gpt-3.5-turbo":
32
+ print("Using model: gpt-3.5-turbo")
33
+ self.llm = ChatOpenAI(temperature=temperature)
34
+
35
+ if model == "text-davinci-003":
36
+ print("Using model: text-davinci-003")
37
+ self.llm = OpenAI(temperature=temperature)
38
+
39
+ def create_loader(self, directory: str):
40
+ # Create a loader based on the provided directory (either local or S3)
41
+ if directory.startswith("s3://"):
42
+ self.loader = S3DirectoryLoader(directory)
43
+ else:
44
+ self.loader = DirectoryLoader(directory, recursive=True)
45
+
46
+ def load_or_create_index(self, index_path: str | None):
47
+ # Load an existing index from disk or create a new one if not available
48
+ if index_path is not None and os.path.exists(index_path):
49
+ print("Loading path from disk...")
50
+ with open(index_path, "rb") as f:
51
+ self.search_index = pickle.load(f)
52
+ else:
53
+ print("Creating index...")
54
+ self.search_index = FAISS.from_documents(
55
+ self.loader.load_and_split(), OpenAIEmbeddings()
56
+ )
57
+
58
+ def save_index(self, index_path: str):
59
+ # Save the index to the specified path
60
+ with open(index_path, "wb") as f:
61
+ pickle.dump(self.search_index, f)
62
+
63
+ def print_answer(self, question, k=1):
64
+ # Retrieve and print the answer to the given question
65
+ input_documents = self.search_index.similarity_search(question, k=k)
66
+ print(
67
+ self.chain(
68
+ {
69
+ "input_documents": input_documents,
70
+ "question": question,
71
+ },
72
+ return_only_outputs=True,
73
+ )["output_text"]
74
+ )
75
+
76
+ def get_answer(self, question, k=1):
77
+ # Retrieve the answer to the given question and return it
78
+ input_documents = self.search_index.similarity_search(question, k=k)
79
+ return self.chain(
80
+ {
81
+ "input_documents": input_documents,
82
+ "question": question,
83
+ },
84
+ return_only_outputs=True,
85
+ )["output_text"]
qnabot/__init__.py ADDED
File without changes
setup.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="qnabot",
5
+ version="0.0.1",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ # List your package's dependencies here, e.g.,
9
+ # "numpy>=1.18",
10
+ ],
11
+ author="Megaklis Vasilakis",
12
+ author_email="megaklis.vasilakis@gmail.com",
13
+ description="Create a question answering over docs bot with one line of code.",
14
+ long_description=open("README.md").read(),
15
+ long_description_content_type="text/markdown",
16
+ url="https://github.com/momegas/qnabot",
17
+ classifiers=[
18
+ # Choose appropriate classifiers from
19
+ # https://pypi.org/classifiers/
20
+
21
+ Development Status :: 4 - Beta
22
+ ],
23
+ )