In [1]:
from langchain_community.vectorstores import Qdrant

In [2]:
from qdrant_client import QdrantClient, models

client = QdrantClient(path="Qdrant_db")

In [3]:
from langchain_openai.embeddings import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

In [7]:
collection_name = "Meta info 400"
qdrant =  Qdrant(client, collection_name, embedding_model)

In [8]:
qdrant_retriever = qdrant.as_retriever()

In [9]:
query = "What did the meta president said"
qdrant_retriever.invoke(query)

[Document(page_content='these risks and uncertainties, readers are cautioned not to place undue reliance on such forward‑looking statements.\nUnless expressly indicated or the context requires otherwise, the terms "Meta," "company," "we," "us," and "our" in this document refer to Meta', metadata={'source': 'Data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf', 'file_path': 'Data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf', 'page': 3, 'total_pages': 147, 'format': 'PDF 1.4', 'title': '0001326801-24-000012', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-K filed on 2024-02-02 for the period ending 2023-12-31', 'keywords': '0001326801-24-000012; ; 10-K', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': "D:20240202060356-05'00'", 'modDate': "D:20240202060413-05'00'", 'trapped': '', 'encryption': 'Standard V2 R3 128-bit RC4', '_id': '77ee8ea68d4940c7a649688a4b4dc095', '_collection_name': 'Meta info 40

In [10]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")

In [11]:
from langchain_core.prompts import ChatPromptTemplate

In [126]:
system_message = "You are a helpful assistante, experienced lawyer and an expert reading SECURITIES AND EXCHANGE COMMISSION documents"

In [19]:
RAG_PROMPT = """

CONTEXT:
{context}

QUERY:
{question}

Answer the query above using the context provided. If you don't know the answer responde with: I don't know
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [13]:
from langchain.retrievers import MultiQueryRetriever

advanced_retriever = MultiQueryRetriever.from_llm(retriever=qdrant_retriever, llm=openai_chat_model)


In [53]:
rag_prompt

ChatPromptTemplate(input_variables=['context', 'question', 'system_message'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question', 'system_message'], template="\n\nSYSTEM:\n{system_message}\n\nCONTEXT:\n{context}\n\nQUERY:\n{question}\n\nAnswer the query above only using the context provided. If you don't know the answer responde with: I don't know\n"))])

In [20]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | advanced_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)


"What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"

"Who are Meta's 'Directors' (i.e., members of the Board of Directors)?"

In [103]:
retr = (rag_prompt | openai_chat_model)
resp = retr.invoke({"question" : "What was the total value ", "context": "sdfsfsdfs", "system_message" : "dfgdfg"})
resp

AIMessage(content="I don't know", response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 49, 'total_tokens': 53}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-6ad6e2e1-23e4-4aa7-a17c-12beea722db8-0')

In [21]:
response = retrieval_augmented_qa_chain.invoke({"question" : "Who is the Board Chair and Chief Executive Officer?"})
response["response"].content

'Mark Zuckerberg is the Board Chair and Chief Executive Officer.'

In [148]:
response

{'response': AIMessage(content='Mark Zuckerberg', response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 1705, 'total_tokens': 1707}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-7a0deb8c-4266-486b-964d-1b55397557f1-0'),
 'context': [Document(page_content='/s/ MARK ZUCKERBERG\nMark Zuckerberg\nBoard Chair and Chief Executive Officer\n(Principal Executive Officer)', metadata={'source': 'Data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf', 'file_path': 'Data/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf', 'page': 139, 'total_pages': 147, 'format': 'PDF 1.4', 'title': '0001326801-24-000012', 'author': 'EDGAR® Online LLC, a subsidiary of OTC Markets Group', 'subject': 'Form 10-K filed on 2024-02-02 for the period ending 2023-12-31', 'keywords': '0001326801-24-000012; ; 10-K', 'creator': 'EDGAR Filing HTML Converter', 'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creationDate': "D:20240202060356-05