from langchain_core.tools import tool import pinecone from langchain_google_genai import GoogleGenerativeAIEmbeddings import os from dotenv import load_dotenv load_dotenv() GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY") PINECONE_API = os.getenv("PINECONE_API_KEY") google_embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", # Correct model name google_api_key=GOOGLE_API_KEY ) pc = pinecone.Pinecone( api_key=PINECONE_API ) PINECONE_INDEX = "rites-pdf" index = pc.Index(PINECONE_INDEX) @tool def get_context(query: str) -> str: """ Retrieve context information by performing a semantic search on indexed document chunks. This tool embeds the provided user query using a Google Generative AI embeddings model, then queries a Pinecone index to fetch the top 10 matching document chunks. Each match includes metadata such as the text chunk, starting page, ending page, and the source PDF URL. The function aggregates these details into a formatted string. Args: query (str): A user query search string used for semantic matching against the document index. Returns: str: A formatted string containing the matched document chunks along with their associated metadata, including start page, end page, and PDF URL. """ embedding = google_embeddings.embed_query(query) search_results = index.query( vector=embedding, top_k=20, # Retrieve top 10 results include_metadata=True ) context = " " count = 1 for match in search_results["matches"]: chunk = match["metadata"].get("chunk") url = match["metadata"].get("pdf_url") start_page = match["metadata"].get("start_page") end_page = match["metadata"].get("end_page") context += f""" Chunk {count}: {chunk} start_page: {start_page} end_page: {end_page} pdf_url: {url} ######################################### """ count += 1 return context