Spaces:

vgkienzler
/

midterm-nvidia-rag

Sleeping

vgkienzler

Add application files.

203f5b6 8 months ago

5.6 kB

	import os

	from dotenv import load_dotenv
	from langchain.embeddings import CacheBackedEmbeddings
	from langchain.storage import LocalFileStore

	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough, RunnableParallel
	from langchain_openai import OpenAIEmbeddings, ChatOpenAI
	from langchain.prompts import ChatPromptTemplate

	from datasets import Dataset

	from ragas.testset.generator import TestsetGenerator
	from ragas.testset.evolutions import simple, reasoning, multi_context
	from ragas import evaluate
	from ragas.metrics import (
	faithfulness,
	answer_relevancy,
	answer_correctness,
	context_recall,
	context_precision,
	)

	TEST_SIZE = 10
	CACHE_STORE = "data/cache/"

	# Load the environment variables to gain access to OpenAI, WandB, and the other APIs.
	load_dotenv()

	# Initialize tracking and monitoring
	os.environ['LANGCHAIN_TRACING_V2'] = "true"
	os.environ["LANGCHAIN_PROJECT"] = "midterm_chainlit"

	# Set the embedding and completion model.
	embedding_model = "text-embedding-3-small"
	llm_model_name = "gpt-3.5-turbo"


	def get_cached_embedder(cache_store_path: str, embedding_model: str):
	"""Cache the embeddings and return the embedder."""
	local_file_store = LocalFileStore(cache_store_path)
	embeddings = OpenAIEmbeddings(
	model=embedding_model,
	)

	return CacheBackedEmbeddings.from_bytes_store(
	embeddings, local_file_store, namespace=embeddings.model
	)


	def get_documents(test: bool = False):
	if test:
	return "harrison worked at Kensho"
	else:
	# Load the document.
	loader = PyMuPDFLoader(
	"data/nvidia_filings.pdf",
	)
	return loader.load()


	def chunk_and_store(documents, cached_embedder):
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=400,
	chunk_overlap=50,
	)

	if type(documents) is str:
	chunks = splitter.split_text(documents)
	vector_store = FAISS.from_texts(chunks, cached_embedder)
	else:
	# Split the document into chunks.
	chunks = splitter.split_documents(documents)
	# Store the embeddings.
	vector_store = FAISS.from_documents(chunks, cached_embedder)

	vector_store.save_local("data/vector_store.index")
	print("Vector store index saved on disk.")
	print(len(chunks))
	return vector_store


	def get_store():
	if not os.path.exists("data/vector_store.index"):
	chunk_and_store(get_documents(), get_cached_embedder(CACHE_STORE, embedding_model))
	else:
	print("Loading the vector store from the disk.")
	return FAISS.load_local(
	"data/vector_store.index",
	get_cached_embedder(CACHE_STORE, embedding_model),
	allow_dangerous_deserialization=True)


	def get_chain(retriever):
	return (
	RunnableParallel(context=retriever, question=RunnablePassthrough())
	\| prompt
	\| primary_qa_llm
	\| StrOutputParser()
	)


	def retrieve_and_answer(questions: list, retriever):
	answers = []
	for question in questions:
	chain = get_chain(retriever)
	answer = chain.invoke(question)
	answers.append(answer)
	return answers


	documents = get_documents(test=False)

	# Define the test questions
	question_1 = "Who is the E-VP, Operations - and how old are they?"
	question_2 = "what is the gross carrying amount of Total Amortizable Intangible Assets for Jan 29, 2023?"
	questions = [question_1, question_2]

	# Define the retrieval prompt.
	retrieval_prompt_template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I cannot answer the question with the context provided.':
	Context: {context}

	Question:
	{question}
	"""

	prompt = ChatPromptTemplate.from_template(retrieval_prompt_template)

	# Define the model
	primary_qa_llm = ChatOpenAI(
	model_name=llm_model_name,
	temperature=0.0,
	streaming=True,
	)

	store = get_store()
	retriever = store.as_retriever()
	chain = get_chain(retriever)

	for answer in retrieve_and_answer(questions, retriever):
	print(answer)
	#
	# eval_documents = documents
	#
	# text_splitter = RecursiveCharacterTextSplitter(
	# chunk_size=1500,
	# chunk_overlap=400
	# )
	#
	# eval_documents = text_splitter.split_documents(eval_documents)
	#
	# generator = TestsetGenerator.with_openai()
	#
	# test_set = generator.generate_with_langchain_docs(
	# eval_documents,
	# test_size=TEST_SIZE,
	# distributions={simple: 1},
	# )
	#
	# test_df = test_set.to_pandas()
	# test_questions = test_df["question"].values.tolist()
	# test_ground_truths = test_df["ground_truth"].values.tolist()
	#
	# ragas_answers = []
	# ragas_contexts = []
	#
	# for question in test_questions:
	# response = get_chain(retriever).invoke({"question": question})
	# ragas_answers.append(response["response"].content)
	# ragas_contexts.append([context.page_content for context in response["context"]])
	#
	# response_dataset = Dataset.from_dict({
	# "question": test_questions,
	# "answer": ragas_answers,
	# "contexts": ragas_contexts,
	# "ground_truth": test_ground_truths
	# })
	#
	# metrics = [
	# faithfulness,
	# answer_relevancy,
	# context_recall,
	# context_precision,
	# answer_correctness,
	# ]
	#
	# results = evaluate(response_dataset, metrics)
	#
	# results_df = results.to_pandas()
	#
	# print(results_df)