Spaces:

RajMoon
/

codepath

Sleeping

App Files Files Community

codepath / rag.py

RajMoon

Upload 6 files

0539069 verified almost 2 years ago

raw

history blame contribute delete

4.01 kB

	import os
	from langchain_core.runnables.base import RunnableSequence
	from langchain_core.runnables.passthrough import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain_community.document_loaders import CSVLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings
	from langchain.embeddings import CacheBackedEmbeddings
	from langchain.storage import LocalFileStore
	from langchain_community.vectorstores import FAISS

	# PIP Install the following packages:
	# pip install -q langchain
	# pip install -q langchain_openai
	# pip install -q faiss-cpu tiktoken
	# pip install -q -U langchain
	# pip install -U langchain-community

	class RAGModel:
	def __init__(self, api_key):
	self.api_key = api_key
	# for the RAG model. First we need to get the Dcouments processed to be used as context for the model.

	# Load DataSet
	csv_file = "imdb_datasets.csv"
	loader = CSVLoader(csv_file)
	csv_data = loader.load()

	# 1. Split the dataset
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	chunked_documents = text_splitter.split_documents(csv_data)
	print(f"Number of documents: {len(chunked_documents)}")
	#len(chunked_documents) # ensure we have actually split the data into chunks

	# 2. Create embeddings
	embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=self.api_key)
	print("Created embeddings")

	# Create a cache backed embeddings
	local_store = LocalFileStore("./cache/")
	cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, local_store, namespace=embedding_model.model)
	print("Created cache backed embeddings")

	# 3. Save the documents in the vector store as embeddings

	self.vector_store = FAISS.from_documents(chunked_documents, cached_embedder)
	self.vector_store.save_local("faiss_index")

	# 3. Retrive the vector store
	# create a retriever
	retriever = self.vector_store.as_retriever()

	# 4. Create a prompt - LangChain
	# The prompt has place for the context from the raq and the question from the user
	prompt_template = ChatPromptTemplate.from_messages(
	[
	("system", "You are an excellent movie critic who always includes great movie recommendations in your response. If the answer is not in the context let the user know "),
	("human", "Using this context: {context}, please answer this question: {question}")
	]
	)

	chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=api_key)
	parser = StrOutputParser()

	self.runnable_chain = (
	{
	"context": retriever,
	"question": RunnablePassthrough(),
	}
	\| prompt_template
	\| chat_model
	\| parser
	)

	def query(self, question) -> str:
	print(f"Querying the RAG instance with the question: {question}")
	output_chunks = self.runnable_chain.invoke(question)
	return ''.join(output_chunks)


	# def main():
	# Create an instance of RAG class
	# api_key = os.getenv("OPENAI_API_KEY")
	# rag = RAGModel(api_key=api_key)

	# while True:
	# # Take input from command line
	# question = input("Enter your question (or type 'exit' to quit): ")

	# # Check if user wants to exit
	# if question.lower() == "exit":
	# break

	# # Query the RAG instance
	# answer = rag.query(question)

	# # Print the answer
	# print("Answer:", answer)


	# if __name__ == "__main__":
	# main()