Spaces:

Leopat
/

thesis_chat_with_history_books

Sleeping

App Files Files

thesis_chat_with_history_books / finalthesis /rag_chains.py

Leopat

upload src files

3b4f6eb verified 9 months ago

raw

history blame

7.64 kB

	"""
	Python package defining the different chains that will be tested in the thesis
	"""

	from langchain_openai import ChatOpenAI
	from langchain_openai import OpenAIEmbeddings
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_chroma import Chroma

	from langchain.chains import create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain

	from langchain_core.retrievers import BaseRetriever
	from langchain_core.language_models.llms import BaseLLM
	from langchain_core.output_parsers.base import BaseOutputParser
	from langchain_core.runnables import RunnablePassthrough, RunnableParallel
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.documents import Document

	# define prompt
	BASE_SYSTEM_PROMPT: str = (
	"You are an assistant for question-answering tasks over books. "
	"Only use the following book extracts to answer the question. "
	"If you don't know the answer, say that you don't know. "
	"\n\n"
	"{context}"
	)

	BASE_QA_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
	[
	("system",
	"You are an assistant for question-answering tasks over books. "
	"Only use the following book extracts to answer the question. "
	"If you don't know the answer, say that you don't know. "
	"\n\n"
	"{context}"),
	("human",
	"{question}"),
	]
	)

	# # define prompt
	# BASE_SYSTEM_PROMPT: str = (
	# "You are an assistant for question-answering tasks over books. "
	# "Only use the following book extracts to answer the question. "
	# "If you don't know the answer, say that you don't know. "
	# "\n\n"
	# "{context}"
	# )


	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	def build_naive_rag_chain(
	retriever: BaseRetriever,
	llm: BaseLLM,
	retrieval_prompt: ChatPromptTemplate = BASE_QA_PROMPT,
	output_parser: BaseOutputParser = StrOutputParser()
	):

	rag_chain_from_docs = (
	RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
	\| retrieval_prompt
	\| llm
	\| output_parser
	)

	rag_chain_with_source = RunnableParallel(
	{"context": retriever, "question": RunnablePassthrough()}
	).assign(answer=rag_chain_from_docs)

	return rag_chain_with_source

	class RATChain:

	THOUGHTS_PROMPT_TEMPLATE: str = """
	IMPORTANT:
	Answer this question with step-by-step thoughts.
	Split your different thoughts with \n\n to structure your answer into several paragraphs.

	Only reply to the question directly.
	DO NOT add additional explanations or information that is not related to the question unless you are asked to.
	"""
	THOUGHTS_PROMPT = ChatPromptTemplate.from_messages([
	("system", THOUGHTS_PROMPT_TEMPLATE),
	("user", "{question}")
	])

	GENERATE_QUERY_PROMPT = ChatPromptTemplate.from_messages([
	("user",
	"I want to verify the content correctness of the given question. "
	"Summarize the main points of the content and provide a query that I can use "
	"to retrive information from a textbook."
	"Make the query as relevant as possible to the last content."
	"IMPORTANT"
	"Just output the query directly. DO NOT add additional explanations or introducement "
	"in the answer unless you are asked to."
	"CONTENT: {content}"
	)
	])

	REVISE_ANSWER_PROMPT = ChatPromptTemplate.from_messages(
	[
	("user",
	"Verify the answer accoridng ot the retrieved information, "
	"while keeping the initial question in mind. "

	"If you find any mistakes, correct them."
	"If you find any missing information, add them."
	"If you find any irrelevant information, remove them."
	"If you find the answer is correct and does not need improvement, output the original answer."

	"IMPORTANT"
	"Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding."
	"Add more details from retrieved text to the answer."
	"Split the paragraphs with \n\n characters."
	"Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to."


	"INITIAL QUESTION:{question}"

	"ANSWER:{answer}"
	"\n\n"
	"retrieved information={retrieved_info}"

	)
	])

	@staticmethod
	def split_thoughts(thoughts: str) -> list[str]:
	return thoughts.split("\n\n")

	@staticmethod
	def get_page_content(docs: list[list[Document]]):
	docs_page_content = []
	for doc_list in docs:
	docs_page_content.append([doc.page_content for doc in doc_list])
	return docs_page_content

	def __init__(self, retriever: BaseRetriever, llm: BaseLLM):
	self.retriever = retriever
	self.llm = llm


	def get_initial_thought_chain(self):
	return self.THOUGHTS_PROMPT \| self.llm \| StrOutputParser()

	def get_revise_answer_chain(self):
	return self.REVISE_ANSWER_PROMPT \| self.llm \| StrOutputParser()

	def get_generate_query_chain(self):
	return self.GENERATE_QUERY_PROMPT \| self.llm \| StrOutputParser()

	def iteratively_improve_thoughts(self, question: str, thoughts: str):
	splited_thoughts = self.split_thoughts(thoughts)

	# initial_thought_chain = self.get_initial_thought_chain()
	generate_query_chain = self.get_generate_query_chain()
	revise_answer_chain = self.get_revise_answer_chain()


	responses = []
	queries = []
	contexts = []


	answer = thoughts
	for i, content in enumerate(splited_thoughts):
	query = generate_query_chain.invoke(content)
	queries.append(query)

	retrieved_info = self.retriever.invoke(query)
	contexts.append(retrieved_info)

	answer = revise_answer_chain.invoke({
	"question":question,
	"answer":answer,
	"retrieved_info":retrieved_info
	})
	responses.append(answer)

	output = {
	"question": question,
	"splited_thoughts":splited_thoughts,
	"queries": queries,
	"context": contexts,
	"responses": responses,
	"answer": answer,
	}
	return output

	def invoke(self, question: str):
	initial_thought_chain = self.get_initial_thought_chain()
	thoughts = initial_thought_chain.invoke(question)

	response = self.iteratively_improve_thoughts(question, thoughts)

	return response


	def retrival_augmented_thoughts(self, question: str):
	"does exactly the same as invoke"
	initial_thought_chain = self.get_initial_thought_chain()
	thoughts = initial_thought_chain.invoke(question)

	response = self.iteratively_improve_thoughts(question, thoughts)

	return response

	# retrival_augmented_regneration(
	# question: str,
	# subquestion_chain: LLMChain,
	# loop_function:Callable,
	# retriever: BaseRetriever,
	# ) -> dict:
	# response = subquestion_chain.invoke(question)
	# return loop_function(question, response, retriever)