from pymongo import MongoClient # error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings from langchain_openai import OpenAIEmbeddings # error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch from langchain_community.vectorstores import MongoDBAtlasVectorSearch # error since Jan 2024, from langchain.document_loaders import DirectoryLoader from langchain_community.document_loaders import DirectoryLoader # error since Jan 2024, from langchain.llms import OpenAI from langchain_community.llms import OpenAI from langchain.chains import RetrievalQA import gradio as gr from gradio.themes.base import Base #import key_param import os def query_data(query,openai_api_key,mongo_uri): os.environ["OPENAI_API_KEY"] = openai_api_key os.environ["MONGO_URI"] = mongo_uri client = MongoClient(mongo_uri) dbName = "langchain_demo" collectionName = "collection_of_text_blobs" collection = client[dbName][collectionName] # Define the text embedding model embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) # Initialize the Vector Store vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" ) # Convert question to vector using OpenAI embeddings # Perform Atlas Vector Search using Langchain's vectorStore # similarity_search returns MongoDB documents most similar to the query docs = vectorStore.similarity_search(query, K=1) as_output = docs[0].page_content # Leveraging Atlas Vector Search paired with Langchain's QARetriever # Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model # If it's not specified (for example like in the code below), # then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023 llm = OpenAI(openai_api_key=openai_api_key, temperature=0) # Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore. # Implements _get_relevant_documents which retrieves documents relevant to a query. retriever = vectorStore.as_retriever() # Load "stuff" documents chain. Stuff documents chain takes a list of documents, # inserts them all into a prompt and passes that prompt to an LLM. qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever) # Execute the chain retriever_output = qa.run(query) # Return Atlas Vector Search output, and output generated using RAG Architecture return as_output, retriever_output # Create a web interface for the app, using Gradio with gr.Blocks(theme=Base(), title="MongoDB Atlas Vector Search + RAG Architecture") as demo: gr.Markdown( """ # MongoDB Atlas Vector Search + RAG Architecture """) openai_api_key = gr.Textbox(label = "OpenAI API Key (sk-...)", type = "password", lines = 1) mongo_uri = gr.Textbox(label = "Mongo Atlas URI (mongodb+srv://..)", type = "password", lines = 1) textbox = gr.Textbox(label="Enter your Question:") with gr.Row(): button = gr.Button("Submit", variant="primary") with gr.Column(): output1 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output (document field as is):") output2 = gr.Textbox(lines=1, max_lines=10, label="Atlas Vector Search output + Langchain's RetrieverQA + OpenAI LLM:") # Call query_data function upon clicking the Submit button button.click(query_data, inputs=[textbox, openai_api_key, mongo_uri], outputs=[output1, output2] ) demo.launch()