""" Python package defining the different chains that will be tested in the thesis """ from langchain_openai import ChatOpenAI from langchain_openai import OpenAIEmbeddings from langchain_core.prompts import ChatPromptTemplate from langchain_chroma import Chroma from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.retrievers import BaseRetriever from langchain_core.language_models.llms import BaseLLM from langchain_core.output_parsers.base import BaseOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableParallel from langchain_core.output_parsers import StrOutputParser from langchain_core.documents import Document # define prompt BASE_SYSTEM_PROMPT: str = ( "You are an assistant for question-answering tasks over books. " "Only use the following book extracts to answer the question. " "If you don't know the answer, say that you don't know. " "\n\n" "{context}" ) BASE_QA_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages( [ ("system", "You are an assistant for question-answering tasks over books. " "Only use the following book extracts to answer the question. " "If you don't know the answer, say that you don't know. " "\n\n" "{context}"), ("human", "{question}"), ] ) # # define prompt # BASE_SYSTEM_PROMPT: str = ( # "You are an assistant for question-answering tasks over books. " # "Only use the following book extracts to answer the question. " # "If you don't know the answer, say that you don't know. " # "\n\n" # "{context}" # ) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def build_naive_rag_chain( retriever: BaseRetriever, llm: BaseLLM, retrieval_prompt: ChatPromptTemplate = BASE_QA_PROMPT, output_parser: BaseOutputParser = StrOutputParser() ): rag_chain_from_docs = ( RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"]))) | retrieval_prompt | llm | output_parser ) rag_chain_with_source = RunnableParallel( {"context": retriever, "question": RunnablePassthrough()} ).assign(answer=rag_chain_from_docs) return rag_chain_with_source class RATChain: THOUGHTS_PROMPT_TEMPLATE: str = """ IMPORTANT: Answer this question with step-by-step thoughts. Split your different thoughts with \n\n to structure your answer into several paragraphs. Only reply to the question directly. DO NOT add additional explanations or information that is not related to the question unless you are asked to. """ THOUGHTS_PROMPT = ChatPromptTemplate.from_messages([ ("system", THOUGHTS_PROMPT_TEMPLATE), ("user", "{question}") ]) GENERATE_QUERY_PROMPT = ChatPromptTemplate.from_messages([ ("user", "I want to verify the content correctness of the given question. " "Summarize the main points of the content and provide a query that I can use " "to retrive information from a textbook." "Make the query as relevant as possible to the last content." "**IMPORTANT**" "Just output the query directly. DO NOT add additional explanations or introducement " "in the answer unless you are asked to." "CONTENT: {content}" ) ]) REVISE_ANSWER_PROMPT = ChatPromptTemplate.from_messages( [ ("user", "Verify the answer accoridng ot the retrieved information, " "while keeping the initial question in mind. " "If you find any mistakes, correct them." "If you find any missing information, add them." "If you find any irrelevant information, remove them." "If you find the answer is correct and does not need improvement, output the original answer." "**IMPORTANT**" "Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding." "Add more details from retrieved text to the answer." "Split the paragraphs with \n\n characters." "Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to." "INITIAL QUESTION:{question}" "ANSWER:{answer}" "\n\n" "retrieved information={retrieved_info}" ) ]) @staticmethod def split_thoughts(thoughts: str) -> list[str]: return thoughts.split("\n\n") @staticmethod def get_page_content(docs: list[list[Document]]): docs_page_content = [] for doc_list in docs: docs_page_content.append([doc.page_content for doc in doc_list]) return docs_page_content def __init__(self, retriever: BaseRetriever, llm: BaseLLM): self.retriever = retriever self.llm = llm def get_initial_thought_chain(self): return self.THOUGHTS_PROMPT | self.llm | StrOutputParser() def get_revise_answer_chain(self): return self.REVISE_ANSWER_PROMPT | self.llm | StrOutputParser() def get_generate_query_chain(self): return self.GENERATE_QUERY_PROMPT | self.llm | StrOutputParser() def iteratively_improve_thoughts(self, question: str, thoughts: str): splited_thoughts = self.split_thoughts(thoughts) # initial_thought_chain = self.get_initial_thought_chain() generate_query_chain = self.get_generate_query_chain() revise_answer_chain = self.get_revise_answer_chain() responses = [] queries = [] contexts = [] answer = thoughts for i, content in enumerate(splited_thoughts): query = generate_query_chain.invoke(content) queries.append(query) retrieved_info = self.retriever.invoke(query) contexts.append(retrieved_info) answer = revise_answer_chain.invoke({ "question":question, "answer":answer, "retrieved_info":retrieved_info }) responses.append(answer) output = { "question": question, "splited_thoughts":splited_thoughts, "queries": queries, "context": contexts, "responses": responses, "answer": answer, } return output def invoke(self, question: str): initial_thought_chain = self.get_initial_thought_chain() thoughts = initial_thought_chain.invoke(question) response = self.iteratively_improve_thoughts(question, thoughts) return response def retrival_augmented_thoughts(self, question: str): "does exactly the same as invoke" initial_thought_chain = self.get_initial_thought_chain() thoughts = initial_thought_chain.invoke(question) response = self.iteratively_improve_thoughts(question, thoughts) return response # retrival_augmented_regneration( # question: str, # subquestion_chain: LLMChain, # loop_function:Callable, # retriever: BaseRetriever, # ) -> dict: # response = subquestion_chain.invoke(question) # return loop_function(question, response, retriever)