Spaces:

angelesteban00
/

angelesteban00_hg

Sleeping

App Files Files Community

angelesteban00_hg / app.py

angelesteban00

Update app.py

bc5f544 10 months ago

raw

history blame

3.76 kB

	from pymongo import MongoClient
	# error since Jan 2024, from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain_community.embeddings import OpenAIEmbeddings
	# error since Jan 2024, from langchain.vectorstores import MongoDBAtlasVectorSearch
	from langchain_community.vectorstores import MongoDBAtlasVectorSearch
	# error since Jan 2024, from langchain.document_loaders import DirectoryLoader
	from langchain_community.document_loaders import DirectoryLoader
	# error since Jan 2024, from langchain.llms import OpenAI
	from langchain_community.llms import OpenAI
	from langchain.chains import RetrievalQA
	import gradio as gr
	from gradio.themes.base import Base
	#import key_param
	import os

	def query_data(query,openai_api_key,mongo_uri):
	#openai_api_key = os.getenv("OPENAI_API_KEY")
	#mongo_uri = os.getenv("MONGO_URI")

	client = MongoClient(mongo_uri)
	dbName = "langchain_demo"
	collectionName = "collection_of_text_blobs"
	collection = client[dbName][collectionName]

	# Define the text embedding model
	embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

	# Initialize the Vector Store
	vectorStore = MongoDBAtlasVectorSearch( collection, embeddings, index_name="default" )

	# Convert question to vector using OpenAI embeddings
	# Perform Atlas Vector Search using Langchain's vectorStore
	# similarity_search returns MongoDB documents most similar to the query

	docs = vectorStore.similarity_search(query, K=1)
	as_output = docs[0].page_content

	# Leveraging Atlas Vector Search paired with Langchain's QARetriever

	# Define the LLM that we want to use -- note that this is the Language Generation Model and NOT an Embedding Model
	# If it's not specified (for example like in the code below),
	# then the default OpenAI model used in LangChain is OpenAI GPT-3.5-turbo, as of August 30, 2023

	llm = OpenAI(openai_api_key=openai_api_key, temperature=0, model_name='gpt-4-1106-preview')


	# Get VectorStoreRetriever: Specifically, Retriever for MongoDB VectorStore.
	# Implements _get_relevant_documents which retrieves documents relevant to a query.
	retriever = vectorStore.as_retriever()

	# Load "stuff" documents chain. Stuff documents chain takes a list of documents,
	# inserts them all into a prompt and passes that prompt to an LLM.

	qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever, model_name='gpt-4-1106-preview')

	# Execute the chain

	retriever_output = qa.run(query)


	# Return Atlas Vector Search output, and output generated using RAG Architecture
	return as_output, retriever_output

	# Create a web interface for the app, using Gradio

	with gr.Blocks(theme=Base(), title="Question Answering App using Vector Search + RAG") as demo:
	gr.Markdown(
	"""
	# Question Answering App using Atlas Vector Search + RAG Architecture
	""")
	openai_api_key = gr.Textbox(label = "OpenAI 3.5 API Key", value = "sk-", lines = 1)
	mongo_uri = gr.Textbox(label = "Mongo URI", value = "mongodb+srv://", lines = 1)
	textbox = gr.Textbox(label="Enter your Question:")
	with gr.Row():
	button = gr.Button("Submit", variant="primary")
	with gr.Column():
	output1 = gr.Textbox(lines=1, max_lines=10, label="Output with just Atlas Vector Search (returns text field as is):")
	output2 = gr.Textbox(lines=1, max_lines=10, label="Output generated by chaining Atlas Vector Search to Langchain's RetrieverQA + OpenAI LLM:")

	# Call query_data function upon clicking the Submit button

	button.click(query_data,
	inputs=[textbox, openai_api_key, mongo_uri],
	outputs=[output1, output2]
	)

	demo.launch()