Leopat's picture
upload src files
3b4f6eb verified
raw
history blame
7.64 kB
"""
Python package defining the different chains that will be tested in the thesis
"""
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.retrievers import BaseRetriever
from langchain_core.language_models.llms import BaseLLM
from langchain_core.output_parsers.base import BaseOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
# define prompt
BASE_SYSTEM_PROMPT: str = (
"You are an assistant for question-answering tasks over books. "
"Only use the following book extracts to answer the question. "
"If you don't know the answer, say that you don't know. "
"\n\n"
"{context}"
)
BASE_QA_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
[
("system",
"You are an assistant for question-answering tasks over books. "
"Only use the following book extracts to answer the question. "
"If you don't know the answer, say that you don't know. "
"\n\n"
"{context}"),
("human",
"{question}"),
]
)
# # define prompt
# BASE_SYSTEM_PROMPT: str = (
# "You are an assistant for question-answering tasks over books. "
# "Only use the following book extracts to answer the question. "
# "If you don't know the answer, say that you don't know. "
# "\n\n"
# "{context}"
# )
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
def build_naive_rag_chain(
retriever: BaseRetriever,
llm: BaseLLM,
retrieval_prompt: ChatPromptTemplate = BASE_QA_PROMPT,
output_parser: BaseOutputParser = StrOutputParser()
):
rag_chain_from_docs = (
RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
| retrieval_prompt
| llm
| output_parser
)
rag_chain_with_source = RunnableParallel(
{"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)
return rag_chain_with_source
class RATChain:
THOUGHTS_PROMPT_TEMPLATE: str = """
IMPORTANT:
Answer this question with step-by-step thoughts.
Split your different thoughts with \n\n to structure your answer into several paragraphs.
Only reply to the question directly.
DO NOT add additional explanations or information that is not related to the question unless you are asked to.
"""
THOUGHTS_PROMPT = ChatPromptTemplate.from_messages([
("system", THOUGHTS_PROMPT_TEMPLATE),
("user", "{question}")
])
GENERATE_QUERY_PROMPT = ChatPromptTemplate.from_messages([
("user",
"I want to verify the content correctness of the given question. "
"Summarize the main points of the content and provide a query that I can use "
"to retrive information from a textbook."
"Make the query as relevant as possible to the last content."
"**IMPORTANT**"
"Just output the query directly. DO NOT add additional explanations or introducement "
"in the answer unless you are asked to."
"CONTENT: {content}"
)
])
REVISE_ANSWER_PROMPT = ChatPromptTemplate.from_messages(
[
("user",
"Verify the answer accoridng ot the retrieved information, "
"while keeping the initial question in mind. "
"If you find any mistakes, correct them."
"If you find any missing information, add them."
"If you find any irrelevant information, remove them."
"If you find the answer is correct and does not need improvement, output the original answer."
"**IMPORTANT**"
"Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding."
"Add more details from retrieved text to the answer."
"Split the paragraphs with \n\n characters."
"Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to."
"INITIAL QUESTION:{question}"
"ANSWER:{answer}"
"\n\n"
"retrieved information={retrieved_info}"
)
])
@staticmethod
def split_thoughts(thoughts: str) -> list[str]:
return thoughts.split("\n\n")
@staticmethod
def get_page_content(docs: list[list[Document]]):
docs_page_content = []
for doc_list in docs:
docs_page_content.append([doc.page_content for doc in doc_list])
return docs_page_content
def __init__(self, retriever: BaseRetriever, llm: BaseLLM):
self.retriever = retriever
self.llm = llm
def get_initial_thought_chain(self):
return self.THOUGHTS_PROMPT | self.llm | StrOutputParser()
def get_revise_answer_chain(self):
return self.REVISE_ANSWER_PROMPT | self.llm | StrOutputParser()
def get_generate_query_chain(self):
return self.GENERATE_QUERY_PROMPT | self.llm | StrOutputParser()
def iteratively_improve_thoughts(self, question: str, thoughts: str):
splited_thoughts = self.split_thoughts(thoughts)
# initial_thought_chain = self.get_initial_thought_chain()
generate_query_chain = self.get_generate_query_chain()
revise_answer_chain = self.get_revise_answer_chain()
responses = []
queries = []
contexts = []
answer = thoughts
for i, content in enumerate(splited_thoughts):
query = generate_query_chain.invoke(content)
queries.append(query)
retrieved_info = self.retriever.invoke(query)
contexts.append(retrieved_info)
answer = revise_answer_chain.invoke({
"question":question,
"answer":answer,
"retrieved_info":retrieved_info
})
responses.append(answer)
output = {
"question": question,
"splited_thoughts":splited_thoughts,
"queries": queries,
"context": contexts,
"responses": responses,
"answer": answer,
}
return output
def invoke(self, question: str):
initial_thought_chain = self.get_initial_thought_chain()
thoughts = initial_thought_chain.invoke(question)
response = self.iteratively_improve_thoughts(question, thoughts)
return response
def retrival_augmented_thoughts(self, question: str):
"does exactly the same as invoke"
initial_thought_chain = self.get_initial_thought_chain()
thoughts = initial_thought_chain.invoke(question)
response = self.iteratively_improve_thoughts(question, thoughts)
return response
# retrival_augmented_regneration(
# question: str,
# subquestion_chain: LLMChain,
# loop_function:Callable,
# retriever: BaseRetriever,
# ) -> dict:
# response = subquestion_chain.invoke(question)
# return loop_function(question, response, retriever)