File size: 6,634 Bytes
ef24768
8b0f111
42c08ad
8b0f111
 
 
 
 
 
ef24768
c84aaa1
ef24768
 
 
c84aaa1
a1adb2d
cd740d0
93c0f74
8321d28
 
cd740d0
cad5dc7
 
8b091a4
cad5dc7
 
cd740d0
8b091a4
b2258b8
b01f455
 
 
 
b2258b8
 
 
 
 
 
 
 
 
 
 
b01f455
 
 
 
b2258b8
cd740d0
a1adb2d
 
fb4b26a
42c08ad
 
c84aaa1
1299579
 
 
 
ef24768
1299579
 
ef24768
1299579
 
ef24768
 
 
 
 
 
 
 
 
 
 
 
 
 
2de5f29
ef24768
 
 
 
 
 
 
 
 
2de5f29
ef24768
 
 
 
 
 
 
 
 
 
 
2de5f29
a1adb2d
2fa7d52
 
ba04fcf
ef24768
 
 
2de5f29
 
ef24768
 
 
1299579
 
 
 
ef24768
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from pymongo import MongoClient
# error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
# error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
# error since Jan 2024, from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
# error since Jan 2024, from langchain.llms import OpenAI
from langchain_community.llms import OpenAI
from langchain.chains import RetrievalQA
import gradio as gr
from gradio.themes.base import Base
#import key_param
import os

DESCRIPTION = """\
# RAG - MongoDB Atlas Vector Search & OpenAI
This app is designed to provide a simple chat where to improve the answer from OpenAI with private information located in MongoDB Atlas cluster.<br>
Demo based on https://www.mongodb.com/developer/products/atlas/rag-atlas-vector-search-langchain-openai/

## Prerequisites:
 create a free DB called "langchain_demo" and a collection called "collection_of_text_blobs" in MongoDB Atlas (https://cloud.mongodb.com). After that, you have two options:
 
 **option1**) execute locally "load_data.py"/"load_data_from_PDF.py" to create new documents and their embeddings in MongoDB<br>
 **option2**) import the JSON file "langchain_demo.collection_of_text_blobs.json"

## Dataset
The JSON documents in MongoDB looks like (also was splitted and embebed this PDF https://arxiv.org/pdf/2303.08774.pdf):
```
{
  "_id": {
    "$oid": "657b1ffa30c8238be21fd930"
  },
  "text": "
  Alfred: Hi, can you explain to me how compression works in MongoDB?
  Bruce: Sure! MongoDB supports compression of data at rest. It uses either zlib or snappy compression algorithms at the collection level. When data is written, MongoDB compresses and stores it compressed. When data is read, MongoDB uncompresses it before returning it. Compression reduces storage space requirements.
  Alfred: Interesting, that's helpful to know. Can you also tell me how indexes are stored in MongoDB?
  Bruce: MongoDB indexes are stored in B-trees. The internal nodes of the B-trees contain keys that point to children nodes or leaf nodes. The leaf nodes contain references to the actual documents stored in the collection. Indexes are stored in memory and also written to disk. The in-memory B-trees provide fast access for queries using the index.
  Alfred: Ok that makes sense. Does MongoDB compress the indexes as well
  Bruce: Yes, MongoDB also compresses the index data using prefix compression. This compresses common prefixes in the index keys to save space. However, the compression is lightweight and focused on performance vs storage space. Index compression is enabled by default.
  Alfred: Great, that's really helpful context on how indexes are handled. One last question - when I query on a non-indexed field, how does MongoDB actually perform the scanning?
  Bruce: MongoDB performs a collection scan if a query does not use an index. It will scan every document in the collection in memory and on disk to select the documents that match the query. This can be resource intensive for large collections without indexes, so indexing improves query performance.
  Alfred: Thank you for the detailed explanations
  Bruce, I really appreciate you taking the time to walk through how compression and indexes work under the hood in MongoDB. Very helpful!Bruce: You're very welcome! I'm glad I could explain the technical details clearly. Feel free to reach out if you have any other MongoDB questions.",
  "embedding": [...
  ],
  "source": "sample_files/chat_conversation.txt"
}
```
## Demo
"""

def query_data(query,openai_api_key,mongo_uri):
    os.environ["OPENAI_API_KEY"] = openai_api_key
    os.environ["MONGO_URI"] = mongo_uri

    client = MongoClient(mongo_uri)
    dbName = "langchain_demo"
    collectionName = "collection_of_text_blobs"
    collection = client[dbName][collectionName]

    # Define the text embedding model
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Initialize the Vector Store
    vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" )

    # Convert question to vector using OpenAI embeddings
    # Perform Atlas Vector Search using Langchain's vectorStore
    # similarity_search returns MongoDB documents most similar to the query    

    docs = vectorStore.similarity_search(query, K=1)
    as_output = docs[0].page_content

    # Leveraging Atlas Vector Search paired with Langchain's QARetriever

    # Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model
    # If it's not specified (for example like in the code below),
    # then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023
    
    llm = OpenAI(openai_api_key=openai_api_key, temperature=0)


    # Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore.
    # Implements _get_relevant_documents which retrieves documents relevant to a query.
    retriever = vectorStore.as_retriever()

    # Load "stuff" documents chain. Stuff documents chain takes a list of documents,
    # inserts them all into a prompt and passes that prompt to an LLM.

    qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever)

    # Execute the chain

    retriever_output = qa.run(query)


    # Return Atlas Vector Search output, and output generated using RAG Architecture
    return as_output, retriever_output

# Create a web interface for the app, using Gradio

with gr.Blocks(theme=Base(), title="MongoDB Atlas Vector Search + RAG Architecture") as demo:
    gr.Markdown(DESCRIPTION)
    openai_api_key = gr.Textbox(label = "OpenAI API Key (sk-...)", type = "password", lines = 1)
    mongo_uri = gr.Textbox(label = "Mongo Atlas URI (mongodb+srv://..)", type = "password", lines = 1)
    textbox = gr.Textbox(label="Enter your Question related to JSON example or PDF:")
    with gr.Row():
        button = gr.Button("Submit", variant="primary")
    with gr.Column():
        output1 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output (document field as is):")
        output2 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output + Langchain's RetrieverQA + OpenAI LLM:")

# Call query_data function upon clicking the Submit button

    button.click(query_data,
                 inputs=[textbox, openai_api_key, mongo_uri],
                 outputs=[output1, output2]
                )

demo.launch()