Final_Assignment_Template

Configuration error

App Files Files Community

grshot commited on Jul 2, 2025

Commit

b1dbad7

1 Parent(s): 7528f0e

Add Retriver

Browse files

Files changed (2) hide show

agent.py +197 -121
requirements.txt +3 -1

agent.py CHANGED Viewed

@@ -1,16 +1,28 @@
 import os
 from typing import Dict, List, Sequence, TypedDict, cast
 from dotenv import load_dotenv
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langchain_tavily import TavilySearch
 from langgraph.graph import END, START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
 from pydantic import BaseModel
 class WebSearchInput(BaseModel):
@@ -25,31 +37,26 @@ class ArxivSearchInput(BaseModel):
     query: str
-@tool(args_schema=WebSearchInput)
-def search_web(query: str) -> Dict[str, str]:
     """Search the web using Tavily and return relevant results."""
-    try:
-        if not os.getenv("TAVILY_API_KEY"):
-            return {
-                "error": "Tavily API key not found. Please set TAVILY_API_KEY environment variable."
-            }
-        search_docs = TavilySearch(max_results=3).invoke({"query": query})
-        if not search_docs:
-            return {"error": "No results found"}
-        formatted_docs = "\n\n---\n\n".join(
-            [
-                f'Source: {doc.metadata["source"]}\n\n{doc.page_content}'
-                for doc in search_docs
-            ]
-        )
-        return {"web_results": formatted_docs}
-    except Exception as e:
-        return {"error": f"Error searching web: {str(e)}"}
-@tool(args_schema=WikipediaSearchInput)
-def search_wikipedia(query: str) -> Dict[str, str]:
     """Search Wikipedia using LangChain's loader and return the first document summary."""
     try:
         loader = WikipediaLoader(query=query, lang="en", load_max_docs=2)
@@ -64,8 +71,8 @@ def search_wikipedia(query: str) -> Dict[str, str]:
         return {"error": f"Error searching Wikipedia: {str(e)}"}
-@tool(args_schema=ArxivSearchInput)
-def arxiv_search(query: str) -> Dict[str, str]:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
@@ -79,84 +86,140 @@ def arxiv_search(query: str) -> Dict[str, str]:
     return {"arxiv_results": formatted_search_docs}
 # System prompt
 system_prompt = SystemMessage(
-    content="""You are a helpful and precise assistant. When answering questions:
-1. First, understand what information you need to answer the question
-2. Then, use the available tools to gather information
-3. If a tool returns an error or no results, try another tool or rephrase your query
-4. Analyze all the information and formulate a clear, concise answer
-When using tools, follow this format exactly:
-Action: tool_name
-Action Input: {"parameter": "value"}
-Available tools:
-- search_wikipedia: Search Wikipedia articles
-  Input: {"query": "your search term"}
-  Returns: {"wiki_results": "results"} or {"error": "error message"}
-  Best for: Historical facts, definitions, general knowledge
-  Error handling: If no results found, try rephrasing or use web search
-- search_web: Search the web for information
-  Input: {"query": "your search term"}
-  Returns: {"web_results": "results"} or {"error": "error message"}
-  Best for: Recent events, current information, diverse sources
-  Error handling: If no results found, try more specific search terms
-- arxiv_search: Search scholarly papers on arXiv
-  Input: {"query": "topic or keywords"}
-  Returns: {"arxiv_results": "paper summaries with title, authors, abstract"} or {"error": "error message"}
-  Best for: Academic research, recent papers in science and technology
-  Error handling: If no results, simplify keywords or broaden the topic
-Tool usage strategy:
-1. For historical/factual queries:
-   - Start with Wikipedia
-   - If no results, try rephrasing the query
-   - If still no results, switch to web search
-2. For recent events/current info:
-   - Start with web search
-   - If no results, try more specific terms
-   - Cross-reference with Wikipedia if needed
-3. For academic/scientific questions:
-   - Use arxiv_search to find recent papers
-   - Summarize key findings, topics, or citations
-   - Cross-check with web or Wikipedia if needed
-4. For complex queries:
-   - Use all tools to gather comprehensive info
-   - Compare and verify information
-   - Note any discrepancies in your answer
-5. Whenall tools fail:
-   - Try different phrasings
-   - Break complex queries into simpler parts
-   - Be transparent about limitations in your answer
-Your final answer must:
-1. Begin with "FINAL ANSWER:"
-2. Be clear and concise
-3. Directly answer the question asked
-4. Include sources if relevant
-5. Admit uncertainty when information is unclear"""
 )
-# Initialize tools
-tools = [search_wikipedia, search_web, arxiv_search]
-def build_agent_graph(provider: str = "gemini"):
     """Build the graph"""
     # Initialize LLM class
     try:
         gemini_api_key = os.getenv("GEMINI_API_KEY")
-        if provider == "gemini":
             chat_model = ChatGoogleGenerativeAI(
                 model="gemini-2.5-pro",
                 temperature=1.0,
@@ -165,11 +228,7 @@ def build_agent_graph(provider: str = "gemini"):
             )
         elif provider == "huggingface":
             llm = HuggingFaceEndpoint(
-                repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-                task="text-generation",
-                max_new_tokens=1024,
-                do_sample=False,
-                repetition_penalty=1.03,
                 temperature=0,
             )
             chat_model = ChatHuggingFace(llm=llm, verbose=True)
@@ -181,32 +240,47 @@ def build_agent_graph(provider: str = "gemini"):
     llm_with_tools = chat_model.bind_tools(tools)
     # Create nodes
-    def assistant(state: MessagesState) -> Dict[str, List[AIMessage]]:
         """Assistant node"""
-        try:
-            # Get last message
-            messages = state.get("messages", [])
-            if not messages:
-                return {"messages": [AIMessage(content="Error: No messages found")]}
-            # Run LLM and ensure AIMessage response
-            response = llm_with_tools.invoke(messages)
-            if isinstance(response, AIMessage):
-                return {"messages": [response]}
-            return {"messages": [AIMessage(content=str(response))]}
-        except Exception as e:
-            return {"messages": [AIMessage(content=f"Error: {str(e)}")]}
     # Build graph
     builder = StateGraph(MessagesState)
-    builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    builder.set_entry_point("assistant")
-    builder.add_conditional_edges("assistant", tools_condition)
-    builder.add_edge("tools", "assistant")
-    builder.add_edge("assistant", END)
     return builder.compile()
@@ -214,8 +288,6 @@ def build_agent_graph(provider: str = "gemini"):
 # Manual test function
 def test_agent():
     """Run a manual test of the agent"""
-    # Load environment variables from .env file
-    load_dotenv()
     print("\n" + "=" * 50)
     print("Starting Agent Test")
     print("=" * 50)
@@ -230,16 +302,19 @@ def test_agent():
     if not os.getenv("TAVILY_API_KEY"):
         print("\nWarning: TAVILY_API_KEY not set - web search will be unavailable")
     print("\nInitializing agent...")
     try:
-        graph = build_agent_graph(provider="gemini")
         print("Agent initialized successfully")
     except Exception as e:
         print(f"Failed to initialize agent: {str(e)}")
         return
     # Test a single question
-    question = "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?"
     print("\nTesting question:", question)
     print("-" * 50)
@@ -253,6 +328,7 @@ def test_agent():
         # Get answer
         if result and "messages" in result and result["messages"]:
             answer = result["messages"][-1].content
             print("\nResponse received:")
             print("-" * 20)

+import cmath
 import os
 from typing import Dict, List, Sequence, TypedDict, cast
 from dotenv import load_dotenv
+from langchain.tools.retriever import create_retriever_tool
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
+from langchain_community.vectorstores import SupabaseVectorStore
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
+from langchain_huggingface import (
+    ChatHuggingFace,
+    HuggingFaceEmbeddings,
+    HuggingFaceEndpoint,
+)
 from langchain_tavily import TavilySearch
 from langgraph.graph import END, START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
 from pydantic import BaseModel
+from supabase.client import Client, create_client
+# Load environment variables from .env file
+load_dotenv()
 class WebSearchInput(BaseModel):
     query: str
+@tool
+def search_web(query: str) -> str:
     """Search the web using Tavily and return relevant results."""
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query."""
+    search_docs = TavilySearch(max_results=3).invoke({"query": query})
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ]
+    )
+    return {"web_results": formatted_search_docs}
+@tool
+def search_wikipedia(query: str) -> str:
     """Search Wikipedia using LangChain's loader and return the first document summary."""
     try:
         loader = WikipediaLoader(query=query, lang="en", load_max_docs=2)
         return {"error": f"Error searching Wikipedia: {str(e)}"}
+@tool
+def arxiv_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     return {"arxiv_results": formatted_search_docs}
+@tool
+def power(a: float, b: float) -> float:
+    """
+    Get the power of two numbers.
+    Args:
+        a (float): the first number
+        b (float): the second number
+    """
+    return a**b
+@tool
+def square_root(a: float) -> float | complex:
+    """
+    Get the square root of a number.
+    Args:
+        a (float): the number to get the square root of
+    """
+    if a >= 0:
+        return a**0.5
+    return cmath.sqrt(a)
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+@tool
+def divide(a: float, b: float) -> float:
+    """
+    Divides two numbers.
+    Args:
+        a (float): the first float number
+        b (float): the second float number
+    """
+    if b == 0:
+        raise ValueError("Cannot divided by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b
 # System prompt
 system_prompt = SystemMessage(
+    content="""You are a helpful assistant tasked with answering questions using a set of tools.
+Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, Apply the rules above for each element (number or string), ensure there is exactly one space after each comma.
+Your answer should only start with "FINAL ANSWER: ", then follows with the answer. """
 )
+supabase_url = os.environ.get("SUPABASE_URL")
+supabase_service_key = os.environ.get("SUPABASE_SERVICE_KEY")
+# build a retriever
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2"
+)  #  dim=768
+supabase: Client = create_client(supabase_url, supabase_service_key)
+vector_store = SupabaseVectorStore(
+    client=supabase,
+    embedding=embeddings,
+    table_name="documents",
+    query_name="match_documents_langchain",
+)
+create_retriever_tool = create_retriever_tool(
+    retriever=vector_store.as_retriever(),
+    name="Question Search",
+    description="A tool to retrieve similar questions from a vector store.",
+)
+# Initialize tools
+tools = [
+    search_wikipedia,
+    search_web,
+    arxiv_search,
+    power,
+    square_root,
+    multiply,
+    divide,
+    subtract,
+    add,
+    modulus,
+]
+def build_agent_graph(provider: str = "groq"):
     """Build the graph"""
     # Initialize LLM class
     try:
         gemini_api_key = os.getenv("GEMINI_API_KEY")
+        if provider == "groq":
+            # Groq https://console.groq.com/docs/models
+            chat_model = ChatGroq(
+                model="qwen-qwq-32b", temperature=0
+            )  # optional : qwen-qwq-32b gemma2-9b-it
+        elif provider == "gemini":
             chat_model = ChatGoogleGenerativeAI(
                 model="gemini-2.5-pro",
                 temperature=1.0,
             )
         elif provider == "huggingface":
             llm = HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
                 temperature=0,
             )
             chat_model = ChatHuggingFace(llm=llm, verbose=True)
     llm_with_tools = chat_model.bind_tools(tools)
     # Create nodes
+    def assistant(state: MessagesState):
         """Assistant node"""
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    def retriever(state: MessagesState):
+        query = state["messages"][-1].content
+        results = vector_store.similarity_search(query, k=1)
+        if not results:
+            print(f"[retriever] No similar documents found for query: {query}")
+            return {
+                "messages": [
+                    AIMessage(content="I couldn't find any similar content in memory.")
+                ]
+            }
+        similar_doc = results[0]
+        content = similar_doc.page_content
+        if "Final answer :" in content:
+            answer = content.split("Final answer :")[-1].strip()
+        else:
+            answer = content.strip()
+        return {"messages": [AIMessage(content=answer)]}
     # Build graph
     builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever)
+    # builder.add_node("assistant", assistant)
+    # builder.add_node("tools", ToolNode(tools))
+    # builder.add_edge(START, "retriever")
+    # builder.add_edge("retriever", "assistant")
+    # builder.add_conditional_edges(
+    #     "assistant",
+    #     tools_condition,
+    # )
+    # builder.add_edge("tools", "assistant")
+    builder.set_entry_point("retriever")
+    builder.set_finish_point("retriever")
     return builder.compile()
 # Manual test function
 def test_agent():
     """Run a manual test of the agent"""
     print("\n" + "=" * 50)
     print("Starting Agent Test")
     print("=" * 50)
     if not os.getenv("TAVILY_API_KEY"):
         print("\nWarning: TAVILY_API_KEY not set - web search will be unavailable")
+    if not os.getenv("SUPABASE_URL"):
+        print("\nWarning: SUPABASE_URL not set - web search will be unavailable")
     print("\nInitializing agent...")
     try:
+        graph = build_agent_graph(provider="groq")
         print("Agent initialized successfully")
     except Exception as e:
         print(f"Failed to initialize agent: {str(e)}")
         return
     # Test a single question
+    question = "Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\""
     print("\nTesting question:", question)
     print("-" * 50)
         # Get answer
         if result and "messages" in result and result["messages"]:
             answer = result["messages"][-1].content
             print("\nResponse received:")
             print("-" * 20)

requirements.txt CHANGED Viewed

@@ -14,4 +14,6 @@ pytube>=15.0.0
 langchain_huggingface
 langchain-google-genai
 pymupdf
-arxiv

 langchain_huggingface
 langchain-google-genai
 pymupdf
+arxiv
+supabase
+pgvector