File size: 6,566 Bytes
52c6dbe
eb07e3c
 
 
 
 
 
 
 
 
 
 
52c6dbe
eb07e3c
 
 
 
 
52c6dbe
eb07e3c
 
52c6dbe
eb07e3c
 
 
52c6dbe
eb07e3c
1dc0983
eb07e3c
 
 
 
1dc0983
52c6dbe
1dc0983
 
eb07e3c
1dc0983
 
 
 
 
 
 
 
 
 
 
 
 
 
eb07e3c
52c6dbe
1dc0983
 
 
eb07e3c
1dc0983
52c6dbe
eb07e3c
1dc0983
eb07e3c
1dc0983
 
 
eb07e3c
 
 
1dc0983
eb07e3c
 
 
1dc0983
eb07e3c
1dc0983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb07e3c
 
 
 
 
 
 
 
 
 
 
 
1dc0983
eb07e3c
 
 
 
 
 
1dc0983
eb07e3c
 
 
 
1dc0983
eb07e3c
 
 
 
 
 
 
 
 
 
1dc0983
52c6dbe
eb07e3c
 
 
 
 
 
 
 
 
 
52c6dbe
eb07e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
from langchain_community.document_loaders import PyMuPDFLoader
import faiss
from langchain_groq import ChatGroq
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from sentence_transformers import SentenceTransformer
import dotenv
dotenv.load_dotenv()
# Initialize LLM and tools globally

def model_selection(model_name):
    llm = ChatGroq(model=model_name, api_key=os.getenv("GROQ_API_KEY"))
    return llm
    
tools = [TavilySearchResults(max_results=5)]

# Initialize memory for conversation history
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def estimate_tokens(text):
    """Estimate the number of tokens in a text (rough approximation)."""
    return len(text) // 4

def process_pdf_file(file_path):
    """Load a PDF file and extract its text with metadata."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    loader = PyMuPDFLoader(file_path)
    documents = loader.load()
    return documents  # Return list of Document objects with metadata

def chunk_text(documents, max_length=1000):
    """Split documents into chunks with metadata."""
    chunks = []
    for doc in documents:
        text = doc.page_content
        metadata = doc.metadata
        paragraphs = text.split("\n\n")
        current_chunk = ""
        current_metadata = metadata.copy()
        for paragraph in paragraphs:
            if estimate_tokens(current_chunk + paragraph) <= max_length // 4:
                current_chunk += paragraph + "\n\n"
            else:
                chunks.append({"text": current_chunk.strip(), "metadata": current_metadata})
                current_chunk = paragraph + "\n\n"
        if current_chunk:
            chunks.append({"text": current_chunk.strip(), "metadata": current_metadata})
    return chunks

def create_embeddings(chunks, model):
    """Create embeddings for a list of chunk texts."""
    texts = [chunk["text"] for chunk in chunks]
    embeddings = model.encode(texts, show_progress_bar=True, convert_to_tensor=True)
    return embeddings.cpu().numpy(), chunks

def build_faiss_index(embeddings):
    """Build a FAISS HNSW index from embeddings for similarity search."""
    dim = embeddings.shape[1]
    index = faiss.IndexHNSWFlat(dim, 32)  # 32 = number of neighbors in HNSW graph
    index.hnsw.efConstruction = 200  # Higher = better quality, slower build
    index.hnsw.efSearch = 50  # Higher = better accuracy, slower search
    index.add(embeddings)
    return index

def retrieve_similar_chunks(query, index, chunks, model, k=10, max_chunk_length=1000):
    """Retrieve top k similar chunks to the query from the FAISS index."""
    query_embedding = model.encode([query], convert_to_tensor=True).cpu().numpy()
    distances, indices = index.search(query_embedding, k)
    return [(chunks[i]["text"][:max_chunk_length], distances[0][j], chunks[i]["metadata"]) for j, i in enumerate(indices[0])]

def agentic_rag(llm, tools, query, context_chunks, Use_Tavily=False):
    # Sort chunks by relevance (lower distance = more relevant)
    context_chunks = sorted(context_chunks, key=lambda x: x[1])  # Sort by distance
    context = ""
    total_tokens = 0
    max_tokens = 7000  # Leave room for prompt and response
    
    # Aggregate relevant chunks until token limit is reached
    for chunk, _, _ in context_chunks:  # Unpack three elements
        chunk_tokens = estimate_tokens(chunk)
        if total_tokens + chunk_tokens <= max_tokens:
            context += chunk + "\n\n"
            total_tokens += chunk_tokens
        else:
            break
    
    # Define prompt template
    search_instructions = (
        "Use the search tool if the context is insufficient to answer the question or you are unsure. Give source links if you use the search tool."
        if Use_Tavily
        else "Use the context provided to answer the question."
    )
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", """
        You are a helpful assistant. {search_instructions}
        Instructions:
        1. Use the provided context to answer the user's question.
        2. Provide a clear answer, if you don't know the answer, say 'I don't know'.
        3. Prioritize information from the most relevant context chunks.
        """),
        ("human", "Context: {context}\n\nQuestion: {input}"),
        MessagesPlaceholder(variable_name="chat_history"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ])
    
    agent_tools = tools if Use_Tavily else []
    try:
        agent = create_tool_calling_agent(llm, agent_tools, prompt)
        agent_executor = AgentExecutor(agent=agent, tools=agent_tools, memory=memory, verbose=True)
        return agent_executor.invoke({
            "input": query,
            "context": context,
            "search_instructions": search_instructions
        })
    except Exception as e:
        print(f"Error during agent execution: {str(e)}")
        fallback_prompt = ChatPromptTemplate.from_messages([
            ("system", "You are a helpful assistant. Use the provided context to answer the user's question."),
            ("human", "Context: {context}\n\nQuestion: {input}")
        ])
        response = llm.invoke(fallback_prompt.format(context=context, input=query))
        return {"output": response.content} 

if __name__ == "__main__":
    # Process PDF and prepare index
    dotenv.load_dotenv()
    pdf_file = "JatinCV.pdf"
    llm = model_selection("meta-llama/llama-4-scout-17b-16e-instruct")
    texts = process_pdf_file(pdf_file)
    chunks = chunk_text(texts, max_length=1500)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = create_embeddings(chunks, model)
    index = build_faiss_index(embeddings)

    # Chat loop
    print("Chat with the assistant (type 'exit' or 'quit' to stop):")
    while True:
        query = input("User: ")
        if query.lower() in ["exit", "quit"]:
            break
        
        # Retrieve similar chunks
        similar_chunks = retrieve_similar_chunks(query, index, chunks, model, k=3)
        context = "\n".join([chunk for chunk, _ in similar_chunks])
        
        # Generate response
        response = agentic_rag(llm, tools, query=query, context=context, Use_Tavily=True)
        print("Assistant:", response["output"])