Spaces:

Boltuzamaki
/

Chat_with_youtube

Sleeping

App Files Files Community

Chat_with_youtube / src /qna.py

Boltuzamaki

init

b7b243c 8 months ago

raw

history blame contribute delete

4.71 kB

	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain.chains.history_aware_retriever import (
	create_history_aware_retriever,
	)
	from langchain.chains.retrieval import create_retrieval_chain
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_chroma import Chroma
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.chat_history import BaseChatMessageHistory
	from langchain_core.runnables.history import RunnableWithMessageHistory
	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter


	class ConversationalQA:
	"""
	A class that handles conversational question-answering using a
	retrieval-augmented generation approach with session history and
	document retrieval capabilities.
	"""

	def __init__(
	self,
	docs: list,
	chunk_size: int = 1000,
	chunk_overlap: int = 200,
	):
	"""
	Initialize the ConversationalQA class with API key, documents, and
	text splitting configurations.

	:param openai_api_key: OpenAI API key to access LLM
	:param docs: List of documents to be used for retrieval and answering
	:param chunk_size: Maximum size of each text chunk for processing
	:param chunk_overlap: Number of characters to overlap between chunks
	"""
	self.text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=chunk_size, chunk_overlap=chunk_overlap
	)
	self.splits = self.text_splitter.split_documents(docs)
	self.llm = ChatOpenAI()
	self.vectorstore = Chroma.from_documents(
	documents=self.splits,
	embedding=OpenAIEmbeddings(),
	collection_name="youtube",
	)
	self.retriever = self.vectorstore.as_retriever()

	self.qa_system_prompt = """You are an assistant for question-answering
	tasks. Use the following pieces of retrieved context to answer the
	question. If you don't know the answer, just say that you don't know.
	Use three sentences maximum and keep the answer
	concise.\n\n{context}"""

	self.qa_prompt = ChatPromptTemplate.from_messages(
	[
	("system", self.qa_system_prompt),
	MessagesPlaceholder("chat_history"),
	("human", "{input}"),
	]
	)

	self.contextualize_q_system_prompt = """Given a chat history and the
	latest user question which might reference context in the chat
	history, formulate a standalone question which can be understood
	without the chat history. Do NOT answer the question, just
	reformulate it if needed and otherwise return it as is."""

	self.contextualize_q_prompt = ChatPromptTemplate.from_messages(
	[
	("system", self.contextualize_q_system_prompt),
	MessagesPlaceholder("chat_history"),
	("human", "{input}"),
	]
	)

	self.question_answer_chain = create_stuff_documents_chain(
	self.llm, self.qa_prompt
	)
	self.history_aware_chain = create_history_aware_retriever(
	self.llm, self.retriever, self.contextualize_q_prompt
	)
	self.rag_chain = create_retrieval_chain(
	self.history_aware_chain, self.question_answer_chain
	)
	self.store = {}

	def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
	"""
	Retrieve or create a chat history for a given session ID.

	:param session_id: Unique session identifier
	:return: ChatMessageHistory object for the session
	"""
	if session_id not in self.store:
	self.store[session_id] = ChatMessageHistory()
	return self.store[session_id]

	def invoke_chain(self, session_id: str, user_input: str) -> str:
	"""
	Invoke the conversational question-answering chain with user input
	and session history.

	:param session_id: Unique session identifier
	:param user_input: User's question input
	:return: Answer generated by the system
	"""
	conversational_rag_chain = RunnableWithMessageHistory(
	self.rag_chain,
	self.get_session_history,
	input_messages_key="input",
	history_messages_key="chat_history",
	output_messages_key="answer",
	)
	return conversational_rag_chain.invoke(
	{"input": user_input},
	config={"configurable": {"session_id": session_id}},
	)["answer"]