Final_Assignment_Template

Configuration error

App Files Files Community

grshot commited on Jun 30, 2025

Commit

c5a9cfd

1 Parent(s): 183b832

test basic tools

Browse files

Files changed (2) hide show

agent.py +87 -284
app.py +7 -2

agent.py CHANGED Viewed

@@ -1,342 +1,145 @@
-import json
 import os
-from typing import Annotated, Dict, Optional
-import pandas as pd
-from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
-from langchain_community.document_loaders.youtube import TranscriptFormat
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-from langchain_core.runnables import RunnableLambda
 from langchain_core.tools import tool
-from langchain_experimental.tools.python.tool import PythonREPLTool
 from langchain_groq import ChatGroq
-from langchain_huggingface import (
-    ChatHuggingFace,
-    HuggingFaceEmbeddings,
-    HuggingFaceEndpoint,
-)
-# --- Langchain / Langraph ---
 from langchain_tavily import TavilySearch
 from langgraph.graph import END, START, MessagesState, StateGraph
-from langgraph.graph.message import add_messages
 from langgraph.prebuilt import ToolNode, tools_condition
-# Custom exception for tool errors
-class ToolExecutionError(Exception):
-    """Custom exception for tool execution errors"""
-    pass
-@tool("search_web_sources")
-def search_web_sources(query: Annotated[str, "Search query string"]) -> Dict[str, str]:
-    """Performs a web search and returns up to 3 formatted documents with content and source."""
     try:
-        if not os.environ.get("TAVILY_API_KEY"):
-            raise EnvironmentError(
-                "TAVILY_API_KEY is not set in environment variables."
-            )
         search_docs = TavilySearch(max_results=3).invoke({"query": query})
         if not search_docs:
-            return {"web_results": "No results found for the given query."}
-        formatted = "\n\n---\n\n".join(
             [
-                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}">\n{doc.page_content}\n</Document>'
                 for doc in search_docs
             ]
         )
-        return {"web_results": formatted}
     except Exception as e:
-        return {"web_results": f"Error during web search: {str(e)}"}
 @tool
 def search_wikipedia(query: str) -> Dict[str, str]:
     """Search Wikipedia using LangChain's loader and return the first document summary."""
     try:
-        # Input validation
-        if not query or not isinstance(query, str):
-            return {
-                "wiki_results": "Invalid query provided. Please provide a valid search term."
-            }
         loader = WikipediaLoader(query=query, lang="en", load_max_docs=2)
         docs = loader.load()
         if not docs:
-            return {"wiki_results": f"No Wikipedia articles found for query: {query}"}
-        formatted_docs = "---".join(
-            [
-                f'<WikipediaArticle title="{query}">{doc.page_content}</WikipediaArticle>'
-                for doc in docs
-            ]
         )
         return {"wiki_results": formatted_docs}
     except Exception as e:
-        error_msg = str(e)
-        if "Page id" in error_msg and "not found" in error_msg:
-            return {"wiki_results": f"No Wikipedia article found for: {query}"}
-        return {"wiki_results": f"Error searching Wikipedia: {error_msg}"}
-@tool
-def extract_youtube_transcript(video_url: str) -> dict:
-    """Extract transcript from a YouTube video given its URL using LangChain's YouTubeLoader."""
-    try:
-        loader = YoutubeLoader.from_youtube_url(
-            video_url,
-            add_video_info=True,
-            transcript_format=TranscriptFormat.CHUNKS,
-            chunk_size_seconds=30,
-        )
-        docs = loader.load()
-        if docs:
-            formatted_docs = "\n\n---\n\n".join(
-                [
-                    f'<YouTubeTranscript url="{video_url}">\n{doc.page_content}\n</YouTubeTranscript>'
-                    for doc in docs
-                ]
-            )
-            return {"transcript_results": formatted_docs}
-        else:
-            return {"transcript_results": "No transcript found."}
-    except Exception as e:
-        return {"transcript_results": f"Error fetching YouTube transcript: {e}"}
-@tool
-def run_python_code(code: str) -> str:
-    """Execute Python code and return the result.
-    Args:
-        code: Python code as a string.
-    """
-    repl = PythonREPLTool()
-    return repl.run(code)
-# --- System Prompt ---
 system_prompt = SystemMessage(
-    content="""
-You are a helpful and precise assistant with access to several tools. You will receive questions and use tools appropriately to find answers.
-When using tools:
-1. Format tool calls correctly using the tool's exact name and required parameters
-2. Validate inputs before making tool calls
-3. Handle tool responses appropriately, checking for errors
-4. If a tool fails, try an alternative approach or provide a clear error message
-Available tools:
-- search_web_sources: Search web for information (requires query parameter)
-- search_wikipedia: Search Wikipedia articles (requires query parameter)
-- extract_youtube_transcript: Get transcript from YouTube videos (requires video_url parameter)
-- run_python_code: Execute Python code (requires code parameter)
-Think step-by-step:
-1. Understand the question
-2. Choose appropriate tool(s)
-3. Format tool calls correctly
-4. Process tool responses
-5. Formulate final answer
-Use this format strictly:
-FINAL ANSWER: [your concise answer here]
-Rules for your answer:
-- If the answer is a number, write only the number (no commas, units, or symbols unless asked)
-- If it's a string, avoid articles (a, an, the), don't abbreviate, and use plain text digits
-- If a list, follow the rules above for each element and separate with a comma and single space (e.g., "apple, orange, banana")
-- If there's an error, start with "Error:" followed by a clear explanation
-Your response must always begin with: FINAL ANSWER:
-"""
 )
 def build_agent_graph(provider: str = "groq"):
-    # Define toolset
-    tools = [
-        search_web_sources,
-        search_wikipedia,
-        extract_youtube_transcript,
-        run_python_code,
-    ]
-    # Instantiate LLM with proper error handling
-    groq_api_key = os.getenv("GROQ_API_KEY")
-    if not groq_api_key:
-        raise EnvironmentError("GROQ_API_KEY environment variable is not set")
     try:
         from pydantic import SecretStr
         llm = ChatGroq(
-            model="qwen-qwq-32b", temperature=0, api_key=SecretStr(groq_api_key)
         )
     except Exception as e:
-        raise Exception(f"Failed to initialize Groq LLM: {str(e)}")
-    # Bind tools to the LLM
     llm_with_tools = llm.bind_tools(tools)
-    # Assistant: reasoning step that plans next action
-    def assistant_node(state: MessagesState) -> dict:
-        try:
-            # Validate input state
-            if not isinstance(state, dict) or "messages" not in state:
-                raise ValueError("Invalid state format")
-            messages = state["messages"]
-            if not messages:
-                raise ValueError("Empty message list")
-            # Invoke LLM
-            response = llm_with_tools.invoke(messages)
-            if response is None:
-                raise ValueError("LLM returned None response")
-            # Validate response format
-            if not isinstance(response, (AIMessage, HumanMessage, SystemMessage)):
-                raise ValueError(f"Invalid response type from LLM: {type(response)}")
-            # Validate response content
-            if not hasattr(response, "content") or response.content is None:
-                raise ValueError("Response missing content")
-            if not isinstance(response.content, str):
-                raise ValueError(f"Invalid content type: {type(response.content)}")
-            # Ensure response has content
-            if not response.content.strip():
-                raise ValueError("Empty response content")
-            # Add FINAL ANSWER prefix if missing
-            content = response.content
-            if "FINAL ANSWER:" not in content:
-                content = f"FINAL ANSWER: {content}"
-            response = AIMessage(content=content)
-            return {"messages": response}
-        except Exception as e:
-            error_msg = f"Error in assistant node: {str(e)}"
-            print(f"Assistant node error: {error_msg}")  # Log error for debugging
-            return {
-                "messages": AIMessage(
-                    content="FINAL ANSWER: Error occurred while processing request. Please try again."
-                )
-            }
-    # Stubbed retriever node for future integration
-    def retriever_node(state: MessagesState):
-        """Retriever node"""
-        # Example: use vector_store.similarity_search() in real use
-        similar_question = [
-            AIMessage(content="This is a mock similar document from the retriever.")
-        ]
-        if similar_question:
-            example_msg = HumanMessage(
-                content=f"Here I provide a similar question and answer for reference: {similar_question[0].content}",
-            )
-            return {"messages": [system_prompt] + state["messages"] + [example_msg]}
-        else:
-            return {"messages": [system_prompt] + state["messages"]}
-    # Wrap tools with validation
-    def wrap_tool_with_validation(tool):
-        original_func = tool.__call__
-        def validated_call(*args, **kwargs):
-            response = original_func(*args, **kwargs)
-            try:
-                if not isinstance(response, dict):
-                    raise ValueError(
-                        f"Tool response must be a dict, got {type(response)}"
-                    )
-                # Check for common response keys
-                for key in ["web_results", "wiki_results", "transcript_results"]:
-                    if key in response:
-                        if not isinstance(response[key], str):
-                            raise ValueError(
-                                f"Tool response[{key}] must be string, got {type(response[key])}"
-                            )
-                        if not response[key].strip():
-                            raise ValueError(f"Tool response[{key}] is empty")
-                return response
-            except Exception as e:
-                return {"error": f"Tool response validation failed: {str(e)}"}
-        tool.__call__ = validated_call
-        return tool
-    # Apply validation wrapper to each tool
-    validated_tools = [wrap_tool_with_validation(tool) for tool in tools]
-    tool_node = ToolNode(validated_tools)
-    # Define error handling node
-    def error_handler_node(state: MessagesState) -> dict:
-        """Handle errors in the graph execution"""
-        error_msg = state.get("error", "Unknown error occurred")
-        return {
-            "messages": AIMessage(content=f"FINAL ANSWER: Error occurred: {error_msg}")
-        }
-    # Define the graph with ReAct loop and error handling
     builder = StateGraph(MessagesState)
-    builder.add_node("assistant", RunnableLambda(assistant_node))
-    builder.add_node("tools", tool_node)
-    builder.add_node("retriever", RunnableLambda(retriever_node))
-    builder.add_node("error_handler", RunnableLambda(error_handler_node))
     builder.set_entry_point("assistant")
     builder.add_conditional_edges("assistant", tools_condition)
     builder.add_edge("tools", "assistant")
     builder.add_edge("assistant", END)
-    # Add error handling edges
-    def route_by_error(state: MessagesState):
-        """Route to error handler if error is present, otherwise continue normal flow"""
-        if "error" in state:
-            return "error_handler"
-        return None
-    builder.add_conditional_edges(
-        "assistant",
-        route_by_error,
-        {
-            "error_handler": "error_handler",
-        },
-    )
-    builder.add_conditional_edges(
-        "tools",
-        route_by_error,
-        {
-            "error_handler": "error_handler",
-        },
-    )
-    builder.add_edge("error_handler", END)
-    graph = builder.compile()
-    # Optional: test entrypoint to run the graph manually
-    test_input = {
-        "messages": [
-            system_prompt,
-            HumanMessage(content="What is the capital of France?"),
-        ]
-    }
-    # result = graph.invoke(test_input)
-    # print("\nFinal output:", result["messages"][-1].content)
-    return graph

 import os
+from typing import Dict
+from langchain_community.document_loaders import WikipediaLoader
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_groq import ChatGroq
 from langchain_tavily import TavilySearch
 from langgraph.graph import END, START, MessagesState, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
+@tool
+def search_web(query: str) -> Dict[str, str]:
+    """Search the web using Tavily and return relevant results."""
     try:
+        if not os.getenv("TAVILY_API_KEY"):
+            return {
+                "error": "Tavily API key not found. Please set TAVILY_API_KEY environment variable."
+            }
         search_docs = TavilySearch(max_results=3).invoke({"query": query})
         if not search_docs:
+            return {"error": "No results found"}
+        formatted_docs = "\n\n---\n\n".join(
             [
+                f'Source: {doc.metadata["source"]}\n\n{doc.page_content}'
                 for doc in search_docs
             ]
         )
+        return {"web_results": formatted_docs}
     except Exception as e:
+        return {"error": f"Error searching web: {str(e)}"}
 @tool
 def search_wikipedia(query: str) -> Dict[str, str]:
     """Search Wikipedia using LangChain's loader and return the first document summary."""
     try:
         loader = WikipediaLoader(query=query, lang="en", load_max_docs=2)
         docs = loader.load()
         if not docs:
+            return {"error": f"No Wikipedia articles found for query: {query}"}
+        formatted_docs = "\n\n---\n\n".join(
+            [f"Wikipedia Article: {query}\n\n{doc.page_content}" for doc in docs]
         )
         return {"wiki_results": formatted_docs}
     except Exception as e:
+        return {"error": f"Error searching Wikipedia: {str(e)}"}
+# System prompt
 system_prompt = SystemMessage(
+    content="""You are a helpful and precise assistant. When answering questions:
+1. First, understand what information you need to answer the question
+2. Then, use the available tools to gather information
+3. If a tool returns an error or no results, try another tool or rephrase your query
+4. Analyze all the information and formulate a clear, concise answer
+When using tools, follow this format exactly:
+Action: tool_name
+Action Input: {"parameter": "value"}
+Available tools:
+- search_wikipedia: Search Wikipedia articles
+  Input: {"query": "your search term"}
+  Returns: {"wiki_results": "results"} or {"error": "error message"}
+  Best for: Historical facts, definitions, general knowledge
+  Error handling: If no results found, try rephrasing or use web search
+- search_web: Search the web for information
+  Input: {"query": "your search term"}
+  Returns: {"web_results": "results"} or {"error": "error message"}
+  Best for: Recent events, current information, diverse sources
+  Error handling: If no results found, try more specific search terms
+Tool usage strategy:
+1. For historical/factual queries:
+   - Start with Wikipedia
+   - If no results, try rephrasing the query
+   - If still no results, switch to web search
+2. For recent events/current info:
+   - Start with web search
+   - If no results, try more specific terms
+   - Cross-reference with Wikipedia if needed
+3. For complex queries:
+   - Use both tools to gather comprehensive info
+   - Compare and verify information
+   - Note any discrepancies in your answer
+4. When both tools fail:
+   - Try different phrasings
+   - Break complex queries into simpler parts
+   - Be transparent about limitations in your answer
+Your final answer must:
+1. Begin with "FINAL ANSWER:"
+2. Be clear and concise
+3. Directly answer the question asked
+4. Include sources if relevant
+5. Admit uncertainty when information is unclear"""
 )
 def build_agent_graph(provider: str = "groq"):
+    """Build the graph"""
+    # Initialize tools
+    tools = [search_wikipedia, search_web]
+    # Initialize LLM with error handling
     try:
         from pydantic import SecretStr
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if not groq_api_key:
+            raise EnvironmentError("GROQ_API_KEY environment variable is not set")
         llm = ChatGroq(
+            model="qwen-qwq-32b",
+            temperature=0,
+            api_key=SecretStr(groq_api_key),
         )
     except Exception as e:
+        raise Exception(f"Failed to initialize LLM: {str(e)}")
     llm_with_tools = llm.bind_tools(tools)
+    # Create nodes
+    def assistant(state: MessagesState):
+        """Assistant node"""
+        return {"messages": [llm_with_tools.invoke(state["messages"])]}
+    # Build graph
     builder = StateGraph(MessagesState)
+    builder.add_node("assistant", assistant)
+    builder.add_node("tools", ToolNode(tools))
     builder.set_entry_point("assistant")
     builder.add_conditional_edges("assistant", tools_condition)
     builder.add_edge("tools", "assistant")
     builder.add_edge("assistant", END)
+    return builder.compile()

app.py CHANGED Viewed

@@ -32,11 +32,16 @@ class BasicAgent:
             human_msg = HumanMessage(content=question)
             msgs: List[AnyMessage] = [system_prompt, human_msg]
-            # Create and cast the state
-            input_state = cast(MessagesState, {"messages": msgs})
             # Invoke the graph with proper error handling
             try:
                 result = self.graph.invoke(input_state)
             except Exception as e:
                 print(f"Graph invocation error: {str(e)}")

             human_msg = HumanMessage(content=question)
             msgs: List[AnyMessage] = [system_prompt, human_msg]
+            # Create state dict that matches MessagesState structure
+            input_state = {"messages": msgs}
+            # Cast to MessagesState type
+            input_state = cast(MessagesState, input_state)
             # Invoke the graph with proper error handling
             try:
+                if not self.graph:
+                    raise ValueError("Agent graph not initialized")
                 result = self.graph.invoke(input_state)
             except Exception as e:
                 print(f"Graph invocation error: {str(e)}")