Spaces:

mabelwang21
/

Agents_Final_Assignment

Sleeping

App Files Files Community

mabelwang21 commited on May 26, 2025

Commit

2c6f69a

1 Parent(s): 22764df

fix RAG routing update calculate func

Browse files

Files changed (1) hide show

agent.py +179 -19

agent.py CHANGED Viewed

@@ -41,35 +41,44 @@ load_dotenv()
 # Initialize Tavily client (after loading environment variables)
 tavily_client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
 # === System Prompt ===
 SYSTEM_PROMPT = """
 You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
 FINAL ANSWER: [YOUR FINAL ANSWER].
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number nor use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
 """.strip()
 @tool
 def calculate(expr: str) -> str:
-    """Evaluate a math expression. Supports basic operations (+,-,*,/,**) and functions (sin,cos,sqrt,etc)."""
     try:
         import math
-        # Create safe math namespace
         safe_dict = {
-            k: v for k, v in math.__dict__.items()
-            if not k.startswith('_')
-        }
-        safe_dict.update({
             'abs': abs,
             'round': round,
             'max': max,
             'min': min
-        })
-        # Evaluate expression in safe environment
         result = eval(expr, {"__builtins__": {}}, safe_dict)
-        return str(float(result))
     except Exception as e:
         return f"Error calculating expression: {e}"
@@ -123,6 +132,155 @@ def tavily_search(query: str) -> str:
         return str(results)
     except Exception as e:
         return f"Error performing Tavily search: {e}"
 @tool
 def image_recognition(image_path: str) -> str:
@@ -306,7 +464,8 @@ def summarize(text: str, llm=None) -> str:
 # Update tools list
 tools: List[StructuredTool] = [
-    calculate, tavily_search, wikipedia_search, image_recognition,
     read_pdf, read_csv, read_spreadsheet, transcribe_audio,
     youtube_transcript_tool, youtube_transcript_api, read_jsonl,
     python_interpreter, download_file, extract_table,
@@ -458,12 +617,17 @@ class MyAgent:
             builder.add_edge(START, "assistant")
             # Graph flow: force rag_search if files loaded and not yet used, then use tools_condition
             def route(state):
-                # If files loaded and rag not used, force rag_search
-                if state.get("input_file") and not state.get("rag_used", False):
                     return "tools"
-                last_msg = state["messages"][-1] if state.get("messages") else None
-                # Only route to tools if the last message is an AIMessage and has tool_calls
                 if last_msg and isinstance(last_msg, AIMessage):
                     if getattr(last_msg, "tool_calls", None):
                         return "tools"
@@ -516,7 +680,3 @@ class MyAgent:
                 print(f"Message types: {[type(m).__name__ for m in state['messages']]}")
             return state

 # Initialize Tavily client (after loading environment variables)
 tavily_client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
+print(tavily_client)
 # === System Prompt ===
 SYSTEM_PROMPT = """
 You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
 FINAL ANSWER: [YOUR FINAL ANSWER].
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number nor use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
 """.strip()
 @tool
 def calculate(expr: str) -> str:
+    """Evaluate a math expression. Supports operations, numpy and math functions."""
     try:
         import math
+        import numpy as np
+        # Comprehensive math namespace
         safe_dict = {
+            **{k: v for k, v in math.__dict__.items() if not k.startswith('_')},
+            'np': np,
+            'array': np.array,
+            'mean': np.mean,
+            'median': np.median,
+            'std': np.std,
+            'sum': np.sum,
             'abs': abs,
             'round': round,
             'max': max,
             'min': min
+        }
         result = eval(expr, {"__builtins__": {}}, safe_dict)
+        # Format result appropriately
+        if isinstance(result, (np.ndarray, list)):
+            return str(result)
+        if isinstance(result, (int, float)):
+            return str(float(result))
+        return str(result)
     except Exception as e:
         return f"Error calculating expression: {e}"
         return str(results)
     except Exception as e:
         return f"Error performing Tavily search: {e}"
+@tool
+def advanced_search(query: str, max_results: int = 5) -> str:
+    """Advanced web search with multiple strategies and better result parsing."""
+    try:
+        # Try multiple search approaches
+        search_results = []
+        # Primary search
+        results = tavily_client.search(
+            query,
+            search_depth="advanced",
+            max_results=max_results,
+            include_answer=True,
+            include_raw_content=True,
+            include_domains=["arxiv.org", "usgs.gov", "nih.gov", "pubmed.ncbi.nlm.nih.gov"]
+        )
+        if isinstance(results, dict):
+            # Include direct answer if available
+            if results.get("answer"):
+                search_results.append(f"DIRECT ANSWER: {results['answer']}")
+            # Process search results
+            if results.get("results"):
+                for i, result in enumerate(results["results"], 1):
+                    title = result.get("title", "")
+                    content = result.get("content", "")
+                    url = result.get("url", "")
+                    # Extract more content for academic sources
+                    if any(domain in url for domain in ["arxiv.org", "usgs.gov", "nih.gov"]):
+                        content = content[:1000]  # More content for academic sources
+                    else:
+                        content = content[:500]
+                    search_results.append(
+                        f"RESULT {i}:\nTitle: {title}\nURL: {url}\nContent: {content}\n"
+                    )
+        return "\n".join(search_results)
+    except Exception as e:
+        return f"Search error: {e}"
+@tool
+def arxiv_search(query: str, date_filter: str = "") -> str:
+    """Specialized search for arXiv papers with date filtering."""
+    try:
+        # Construct arXiv-specific search
+        arxiv_query = f"site:arxiv.org {query}"
+        if date_filter:
+            arxiv_query += f" {date_filter}"
+        results = tavily_client.search(
+            arxiv_query,
+            search_depth="advanced",
+            max_results=8,
+            include_raw_content=True
+        )
+        if isinstance(results, dict) and results.get("results"):
+            arxiv_results = []
+            for result in results["results"]:
+                if "arxiv.org" in result.get("url", ""):
+                    title = result.get("title", "")
+                    content = result.get("content", "")
+                    url = result.get("url", "")
+                    arxiv_results.append(f"ArXiv Paper:\nTitle: {title}\nURL: {url}\nContent: {content[:800]}\n")
+            return "\n".join(arxiv_results) if arxiv_results else "No arXiv papers found"
+        return "No results found"
+    except Exception as e:
+        return f"ArXiv search error: {e}"
+@tool
+def targeted_search(base_query: str, additional_terms: List[str]) -> str:
+    """Perform multiple targeted searches with different term combinations."""
+    try:
+        all_results = []
+        for terms in additional_terms:
+            query = f"{base_query} {terms}"
+            results = tavily_client.search(query, max_results=3)
+            if isinstance(results, dict) and results.get("results"):
+                all_results.append(f"=== Search: {query} ===")
+                for result in results["results"]:
+                    all_results.append(f"Title: {result.get('title', '')}")
+                    all_results.append(f"URL: {result.get('url', '')}")
+                    all_results.append(f"Content: {result.get('content', '')[:400]}\n")
+        return "\n".join(all_results)
+    except Exception as e:
+        return f"Targeted search error: {e}"
+@tool
+def extract_zip_codes(text: str) -> str:
+    """Extract 5-digit zip codes from text."""
+    try:
+        # Look for 5-digit zip codes
+        zip_pattern = r'\b\d{5}\b'
+        zip_codes = re.findall(zip_pattern, text)
+        # Remove duplicates and sort
+        unique_zips = sorted(list(set(zip_codes)))
+        if unique_zips:
+            return f"Found zip codes: {', '.join(unique_zips)}"
+        else:
+            return "No 5-digit zip codes found in text"
+    except Exception as e:
+        return f"Zip code extraction error: {e}"
+@tool
+def academic_citation_search(paper_info: str) -> str:
+    """Search for academic papers that cite or are cited by the given paper."""
+    try:
+        # Search for papers that reference the given paper
+        citation_queries = [
+            f'"{paper_info}" citations references',
+            f'{paper_info} "cited by"',
+            f'{paper_info} bibliography references',
+            f'site:scholar.google.com {paper_info}'
+        ]
+        results = []
+        for query in citation_queries:
+            search_result = tavily_client.search(query, max_results=3)
+            if isinstance(search_result, dict) and search_result.get("results"):
+                results.extend(search_result["results"])
+        formatted_results = []
+        for result in results[:5]:  # Top 5 citation results
+            formatted_results.append(
+                f"Citation Source: {result.get('title', '')}\n"
+                f"URL: {result.get('url', '')}\n"
+                f"Content: {result.get('content', '')[:500]}\n"
+            )
+        return "\n".join(formatted_results)
+    except Exception as e:
+        return f"Citation search error: {e}"
 @tool
 def image_recognition(image_path: str) -> str:
 # Update tools list
 tools: List[StructuredTool] = [
+    calculate, tavily_search, advanced_search, arxiv_search, targeted_search,
+    academic_citation_search, extract_zip_codes, wikipedia_search, image_recognition,
     read_pdf, read_csv, read_spreadsheet, transcribe_audio,
     youtube_transcript_tool, youtube_transcript_api, read_jsonl,
     python_interpreter, download_file, extract_table,
             builder.add_edge(START, "assistant")
             # Graph flow: force rag_search if files loaded and not yet used, then use tools_condition
             def route(state):
+                last_msg = state["messages"][-1] if state.get("messages") else None
+                # Check if this is a math question that doesn't need RAG
+                is_math_question = re.search(r'(calculate|compute|what is|solve|find the value|evaluate)',
+                                          state["messages"][-2].content.lower()) if len(state["messages"]) > 1 else False
+                # Only force RAG if we have files AND it's not a pure math question AND RAG hasn't been used
+                if (state.get("input_file") and not state.get("rag_used", False) and not is_math_question):
                     return "tools"
+                # Regular tool routing logic
                 if last_msg and isinstance(last_msg, AIMessage):
                     if getattr(last_msg, "tool_calls", None):
                         return "tools"
                 print(f"Message types: {[type(m).__name__ for m in state['messages']]}")
             return state