import os from dotenv import load_dotenv from bs4.filter import SoupStrainer from langchain_community.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import Chroma from langchain.prompts import ChatPromptTemplate from langchain.load import dumps, loads from operator import itemgetter from langchain_core.output_parsers import StrOutputParser from langchain_openai import ChatOpenAI from helper import get_retriever load_dotenv() def get_unique_union(documents: list[list]): """ Unique union of retrieved docs """ # Flatten list of lists, and convert each Document to string flattened_docs = [dumps(doc) for sublist in documents for doc in sublist] # Get unique documents unique_docs = list(set(flattened_docs)) # Return return [loads(doc) for doc in unique_docs] def get_answer_using_query_expansion(link: str, question: str): retrievar = get_retriever(link) template = """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines. Original question: {question}""" prompt_perspectives = ChatPromptTemplate.from_template(template) llm = ChatOpenAI(temperature=0) generate_queries = ( prompt_perspectives | llm | StrOutputParser() | (lambda x: x.split("\n")) ) retrieval_chain = generate_queries | retrievar.map() | get_unique_union # docs = retrieval_chain.invoke({"question": question}) template = """Answer the following question based on this context: {context} Question: {question} """ prompt = ChatPromptTemplate.from_template(template) llm = ChatOpenAI(temperature=0) final_rag_chain = ( { "context": retrieval_chain, "question": itemgetter("question") } | prompt | llm | StrOutputParser() ) response = final_rag_chain.invoke({"question": question}) return response # if __name__ == "__main__": # question = "What is task decomposition for LLM agents?" # link = "https://lilianweng.github.io/posts/2023-06-23-agent/" # answer = get_answer(link, question) # print(answer)