Final_Assignment_Template

Running

App Files Files Community

Sandiago21 commited on 1 day ago

Commit

ea150d1

verified ·

1 Parent(s): fd48184

Update app.py

Browse files

Files changed (1) hide show

app.py +651 -87

app.py CHANGED Viewed

@@ -1,51 +1,148 @@
-import os
-import gradio as gr
-import requests
-import inspect
-import pandas as pd
-from smolagents import CodeAgent, DuckDuckGoSearchTool, load_tool, tool
-from smolagents.models import TransformersModel
-import datetime
-import requests
-import pytz
-import yaml
-# from tools.final_answer import FinalAnswerTool
-from PIL import Image
-from io import BytesIO
-# from smolagents.tools import BaseTool
 import requests
 from bs4 import BeautifulSoup
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# class VisitWebpageTool(BaseTool):
-#     name = "visit_webpage"
-#     description = "Fetch and read the content of a webpage"
-#     inputs = {"url": {"type": "string"}}
-#     output_type = "string"
-#     def __call__(self, url: str):
-#         # response = requests.get(url)
-#         # soup = BeautifulSoup(response.text, "html.parser")
-#         # return soup.get_text()[:5000]  # truncate for safety
-#         headers = {
-#             "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
-#         }
-#         response = requests.get(url, headers=headers, timeout=10)
-#         response.raise_for_status()
-#         soup = BeautifulSoup(response.text, "html.parser")
-#         # Extract only readable text
-#         paragraphs = [p.get_text() for p in soup.find_all("p")]
-#         text = "\n".join(paragraphs)
-#         return text[:5000]  # limit size
-@tool
 def visit_webpage(url: str) -> str:
     """
     Fetch and read the content of a webpage.
@@ -54,6 +151,7 @@ def visit_webpage(url: str) -> str:
     Returns:
         Extracted readable text (truncated)
     """
     headers = {
         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
     }
@@ -66,64 +164,525 @@ def visit_webpage(url: str) -> str:
     paragraphs = [p.get_text() for p in soup.find_all("p")]
     text = "\n".join(paragraphs)
-    return text[:5000]
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        self.model = TransformersModel(
-            model_id="Qwen/Qwen2.5-Coder-7B-Instruct",
-            max_new_tokens=768,
-            temperature=0.1,
-        )
-        # with open("prompts.yaml", 'r') as stream:
-        #     prompt_templates = yaml.safe_load(stream)
-        # prompt_templates["final_answer"] = {
-        #     "pre_messages": """You have reached the end of the task.
-        # Review your reasoning and ensure that your result is correct.
-        # You must now return the final answer using the `final_answer()` tool.
-        # Do not output plain text — only use the tool.""",
-        #     "post_messages": """Write Python code that calls:
-        # final_answer(result)
-        # Where `result` is the final answer to the task.
-        # Do not print anything else.
-        # Do not return explanations.
-        # Only call `final_answer` and do a final reason to make sure that you answer the question clearly and directly without additional information if not requested."""
-        # }
-        self.prompt_templates = {'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool.\n\nYou must follow EXACTLY this format:\n\nThought:\n<code>\n# Python code here\n</code>\n\nRules:\n- ALWAYS use <code> and </code>\n- DO NOT use markdown code blocks\n- Use only valid Python\n\nCRITICAL:\n- If the answer requires external information (facts, data, current info), you MUST use a tool.\n- DO NOT guess or hallucinate.\n- DO NOT answer from memory if unsure.\n- Prefer using tools over guessing.\n– If a search result contains a useful link, you MUST use visit_webpage(url) to read it. Do not stop at search results.\n\nYou are NOT allowed to use requests, urllib, or any direct HTTP calls.\n\nAvailable tools:\n- duckduckgo_search(query: str)\n- visit_webpage(url: str)\n\nTo access web content, you MUST use:\n- web_search(query)\n- visit_webpage(url)\n\nAny other method is invalid.\n\nHere are the rules you should always follow to solve your task:\n1. Use only variables that you have defined!\n2. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in \'answer = wiki({\'query\': "What is the place where James Bond lives?"})\', but use the arguments directly as in \'answer = wiki(query="What is the place where James Bond lives?")\'.\n3. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.\n4. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.\n5. Don\'t name any new variable with the same name as a tool: for instance don\'t name a variable \'final_answer\'.\n6. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.\n7. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}\n8. The state persists between code executions: so if in one step you\'ve created variables or imported modules, these will all persist.\n9. Don\'t give up! You\'re in charge of solving the task, not providing directions to solve it.\n\n\nExample:\n\nTask: What is the population of Paris?\n\nThought:\n<code>\nresult = web_search("Paris population")\nprint(result)\n</code>\n\nExample:\n\nTask: Who wrote the novel "1984"?\n\nThought:\n<code>\nresults = web_search("1984 novel author")\nprint(results)\n</code>\n\nThought:\n<code>\npage = visit_webpage(url=results[0])\nprint(page)\n</code>\n\nThought:\n<code>\nfinal_answer("George Orwell")\n</code>\n\nWhen the task is solved, return:\n<code>\nfinal_answer(result)\n</code>\n',
- 'planning': {'initial_facts': 'Below I will present you a task.\nYou will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.\nTo do so, you will have to read the task and identify things that must be discovered in order to successfully complete it.\nDon\'t make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey:\n\n---\n### 1. Facts given in the task\nList here the specific facts given in the task that could help you (there might be nothing here).\n\n### 2. Facts to look up\nList here any facts that we may need to look up.\nAlso list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.\n\n### 3. Facts to derive\nList here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.\n\nKeep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:\n### 1. Facts given in the task\n### 2. Facts to look up\n### 3. Facts to derive\nDo not add anything else.',
-  'initial_plan': "You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.\nNow for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the '\\n<end_plan>' tag and stop there.\n\nHere is your task:\n\nTask:\n```\n{{task}}\n```\nYou can leverage these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name }}: {{ tool.description }}\n    Takes inputs: {{tool.inputs}}\n    Returns an output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'request', a long string explaining your request.\nGiven that this team member is a real human, you should be very verbose in your request.\nHere is a list of the team members that you can call:\n{%- for agent in managed_agents.values() %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n{%- endif %}\n\nList of facts that you know:\n```\n{{answer_facts}}\n```\n\nNow begin! Write your plan below.",
-  'update_facts_pre_messages': 'You are a world expert at gathering known and unknown facts based on a conversation.\nBelow you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these:\n### 1. Facts given in the task\n### 2. Facts that we have learned\n### 3. Facts still to look up\n### 4. Facts still to derive\nFind the task and history below:',
-  'update_facts_post_messages': "Earlier we've built a list of facts.\nBut since in your previous steps you may have learned useful new facts or invalidated some false ones.\nPlease update your list of facts based on the previous history, and provide these headings:\n### 1. Facts given in the task\n### 2. Facts that we have learned\n### 3. Facts still to look up\n### 4. Facts still to derive\nNow write your new list of facts below.",
-  'update_plan_pre_messages': 'You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.\nYou have been given a task:\n```\n{{task}}\n```\n\nFind below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task.\nIf the previous tries so far have met some success, you can make an updated plan based on these actions.\nIf you are stalled, you can make a completely new plan starting from scratch.',
-  'update_plan_post_messages': "You're still working towards solving this task:\n```\n{{task}}\n```\nYou can leverage these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name }}: {{ tool.description }}\n    Takes inputs: {{tool.inputs}}\n    Returns an output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n{%- for agent in managed_agents.values() %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n{%- endif %}\n\nHere is the up to date list of facts that you know:\n```\n{{facts_update}}\n```\n\nNow for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the '\\n<end_plan>' tag and stop there.\n\nNow write your new plan below."},
- 'managed_agent': {'task': "You're a helpful agent named '{{name}}'.\nYou have been submitted this task by your manager.\n---\nTask:\n{{task}}\n---\nYou're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.\nYour final_answer WILL HAVE to contain these parts:\n### 1. Task outcome (short version):\n### 2. Task outcome (extremely detailed version):\n### 3. Additional context (if relevant):\n\nPut all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.\nAnd even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.",
-  'report': "Here is the final answer from your managed agent '{{name}}':\n{{final_answer}}"},
- 'final_answer': {'pre_messages': 'Return the final answer as a SINGLE value.\n\nIf multiple items are required:\n- return a comma-separated string\n- do NOT return a list\n\nExamples:\n- "a,b,c"\n- "42"\n- "3.14"\n',
-  'post_messages': 'Write:\n\n<code>\nfinal_answer(result)\n</code>\n\nWhere result is:\n- a string\n- or a number\n- NEVER a list or array\n'}}
-        self.web_search = DuckDuckGoSearchTool()
-        # self.visit_webpage = VisitWebpageTool()
-        self.agent = CodeAgent(
-            model=self.model,
-            tools=[self.web_search, visit_webpage,],
-            max_steps=5,
-            verbosity_level=1,
-            additional_authorized_imports=["json", "pandas", "wiki", 'random', 'time', 'itertools', 'statistics', 'queue', 'math', 'collections', 'datetime', 'unicodedata', 're', 'stat'],
-            prompt_templates=self.prompt_templates,
-        )
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
@@ -134,8 +693,13 @@ class BasicAgent:
         # if question == "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.":
         # if " image " not in question and " video " not in question:
-        if question == "ssWho nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?":
-            agent_answer = self.agent.run(question)
         else:
             agent_answer = fixed_answer
             # agent_answer = self.agent.run(question)

+import re
+import warnings
+warnings.filterwarnings("ignore")
+import json
+import logging
+from typing import TypedDict, Annotated, Dict, Any
+from json_repair import repair_json
 import requests
 from bs4 import BeautifulSoup
+from pydantic import BaseModel, Field
+from typing import Dict
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from langgraph.graph import StateGraph, START, END
+from langgraph.graph.message import add_messages
+from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
+from IPython.display import display, Markdown, Image
+from langchain_community.retrievers import BM25Retriever
+from langchain_core.tools import Tool
+from langchain_core.documents import Document
+from langgraph.prebuilt import ToolNode, tools_condition
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+# from langchain.agents import create_tool_calling_agent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+sentence_transformer_model = SentenceTransformer("all-mpnet-base-v2")
+logger = logging.getLogger("agent")
+logging.basicConfig(level=logging.INFO)
+class Config(object):
+    def __init__(self):
+        self.random_state = 42
+        self.max_len = 256
+        self.reasoning_max_len = 128
+        self.temperature = 0.1
+        self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
+        # self.reasoning_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
+        # self.reasoning_model_name = "Qwen/Qwen2.5-7B-Instruct"
+        self.reasoning_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
+config = Config()
+tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    config.model_name,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+reasoning_tokenizer = AutoTokenizer.from_pretrained(config.reasoning_model_name)
+reasoning_model = AutoModelForCausalLM.from_pretrained(
+    config.reasoning_model_name,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+def generate(prompt):
+    """
+    Generate a text completion from a causal language model given a prompt.
+    Parameters
+    ----------
+    prompt : str
+        Input text prompt used to condition the language model.
+    Returns
+    -------
+    str
+        The generated continuation text, decoded into a string with special
+        tokens removed and leading/trailing whitespace stripped.
+    """
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=config.max_len,
+            temperature=config.temperature,
+        )
+    generated = outputs[0][inputs["input_ids"].shape[-1]:]
+    return tokenizer.decode(generated, skip_special_tokens=True).strip()
+def reasoning_generate(prompt):
+    """
+    Generate a text completion from a causal language model given a prompt.
+    Parameters
+    ----------
+    prompt : str
+        Input text prompt used to condition the language model.
+    Returns
+    -------
+    str
+        The generated continuation text, decoded into a string with special
+        tokens removed and leading/trailing whitespace stripped.
+    """
+    inputs = reasoning_tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = reasoning_model.generate(
+            **inputs,
+            max_new_tokens=config.reasoning_max_len,
+            temperature=config.temperature,
+        )
+    generated = outputs[0][inputs["input_ids"].shape[-1]:]
+    return reasoning_tokenizer.decode(generated, skip_special_tokens=True).strip()
+class Action(BaseModel):
+    tool: str = Field(...)
+    args: Dict
+# Generate the AgentState and Agent graph
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]
+    proposed_action: str
+    information: str
+    output: str
+    confidence: float
+    judge_explanation: str
+ALL_TOOLS = {
+    "web_search": ["query"],
+    "visit_webpage": ["url"],
+}
+ALLOWED_TOOLS = {
+    "web_search": ["query"],
+    "visit_webpage": ["url"],
+}
 def visit_webpage(url: str) -> str:
     """
     Fetch and read the content of a webpage.
     Returns:
         Extracted readable text (truncated)
     """
     headers = {
         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
     }
     paragraphs = [p.get_text() for p in soup.find_all("p")]
     text = "\n".join(paragraphs)
+    return (text[:500], text[500:1000])
+def web_search(query: str, num_results: int = 10):
+    """
+    Search the internet for the query provided
+    Args:
+        query: Query to search in the internet
+    Returns:
+        list of urls
+    """
+    url = "https://html.duckduckgo.com/html/"
+    headers = {
+        "User-Agent": "Mozilla/5.0"
+    }
+    response = requests.post(url, data={"q": query}, headers=headers)
+    soup = BeautifulSoup(response.text, "html.parser")
+    return [a.get("href") for a in soup.select(".result__a")[:num_results]]
+def planner_node(state: AgentState):
+    """
+    Planning node for a tool-using LLM agent.
+    The planner enforces:
+    - Strict JSON-only output
+    - Tool selection constrained to predefined tools
+    - Argument generation limited to user-provided information
+    Parameters
+    ----------
+    state : dict
+        Agent state dictionary containing:
+        - "messages" (str): The user's natural language request.
+    Returns
+    -------
+    dict
+        Updated state dictionary with additional keys:
+        - "proposed_action" (dict): Parsed JSON tool call in the form:
+              {
+                  "tool": "<tool_name>",
+                  "args": {...}
+              }
+        - "risk_score" (float): Initialized risk score (default 0.0).
+        - "decision" (str): Initial decision ("allow" by default).
+    Behavior
+    --------
+    1. Constructs a planning prompt including:
+       - Available tools and allowed arguments
+       - Strict JSON formatting requirements
+       - Example of valid output
+    2. Calls the language model via `generate()`.
+    3. Attempts to extract valid JSON from the model output.
+    4. Repairs malformed JSON using `repair_json`.
+    5. Stores the parsed action into the agent state.
+    Security Notes
+    --------------
+    - This node does not enforce tool-level authorization.
+    - It does not validate hallucinated tools.
+    - It does not perform risk scoring beyond initializing values.
+    - Downstream nodes must implement:
+        * Tool whitelist validation
+        * Argument validation
+        * Risk scoring and mitigation
+        * Execution authorization
+    Intended Usage
+    --------------
+    Designed for multi-agent or LangGraph-style workflows where:
+        Planner → Risk Assessment → Tool Executor → Logger
+    This node represents the *planning layer* of the agent architecture.
+    """
+    user_input = state["messages"][-1].content
+    prompt = f"""
+You are a planning agent.
+You MUST return ONLY valid JSON as per the tools specs below ONLY.
+No extra text.
+DO NOT invent anything additional beyond the user request provided. Keep it strict to the user request information provided. The question and the query should be fully relevant to the user request provided, no deviation and hallucination. If possible and makes sense then the query should be exactly the user request.
+The available tools and their respective arguments are: {{
+    "web_search": ["query"],
+    "visit_webpage": ["url"],
+}}
+Return exactly the following format:
+Response:
+{{
+  "tool": "...",
+  "args": {{...}}
+}}
+User request: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?. Example of valid JSON expected:
+Response:
+{{"tool": "web_search",
+ "args": {{"query": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
+  }}
+}}
+Return only one Response!
+User request:
+{user_input}
+"""
+    output = generate(prompt)
+    state["proposed_action"] = output.split("Response:")[-1]
+    fixed = repair_json(state["proposed_action"])
+    data = json.loads(fixed)
+    state["proposed_action"] = data
+    return state
+def planner_node(state: AgentState):
+    """
+    Planning node for a tool-using LLM agent.
+    The planner enforces:
+    - Strict JSON-only output
+    - Tool selection constrained to predefined tools
+    - Argument generation limited to user-provided information
+    Parameters
+    ----------
+    state : dict
+        Agent state dictionary containing:
+        - "messages" (str): The user's natural language request.
+    Returns
+    -------
+    dict
+        Updated state dictionary with additional keys:
+        - "proposed_action" (dict): Parsed JSON tool call in the form:
+              {
+                  "tool": "<tool_name>",
+                  "args": {...}
+              }
+        - "risk_score" (float): Initialized risk score (default 0.0).
+        - "decision" (str): Initial decision ("allow" by default).
+    Behavior
+    --------
+    1. Constructs a planning prompt including:
+       - Available tools and allowed arguments
+       - Strict JSON formatting requirements
+       - Example of valid output
+    2. Calls the language model via `generate()`.
+    3. Attempts to extract valid JSON from the model output.
+    4. Repairs malformed JSON using `repair_json`.
+    5. Stores the parsed action into the agent state.
+    Security Notes
+    --------------
+    - This node does not enforce tool-level authorization.
+    - It does not validate hallucinated tools.
+    - It does not perform risk scoring beyond initializing values.
+    - Downstream nodes must implement:
+        * Tool whitelist validation
+        * Argument validation
+        * Risk scoring and mitigation
+        * Execution authorization
+    Intended Usage
+    --------------
+    Designed for multi-agent or LangGraph-style workflows where:
+        Planner → Risk Assessment → Tool Executor → Logger
+    This node represents the *planning layer* of the agent architecture.
+    """
+    user_input = state["messages"][-1].content
+    prompt = f"""
+You are a planning agent.
+You MUST return ONLY valid JSON as per the tools specs below ONLY.
+No extra text.
+DO NOT invent anything additional beyond the user request provided. Keep it strict to the user request information provided. The question and the query should be fully relevant to the user request provided, no deviation and hallucination. If possible and makes sense then the query should be exactly the user request.
+The available tools and their respective arguments are: {{
+    "web_search": ["query"],
+    "visit_webpage": ["url"],
+}}
+Return exactly the following format:
+Response:
+{{
+  "tool": "...",
+  "args": {{...}}
+}}
+User request: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?. Example of valid JSON expected:
+Response:
+{{"tool": "web_search",
+ "args": {{"query": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
+  }}
+}}
+Return only one Response!
+User request:
+{user_input}
+"""
+    output = generate(prompt)
+    state["proposed_action"] = output.split("Response:")[-1]
+    fixed = repair_json(state["proposed_action"])
+    data = json.loads(fixed)
+    state["proposed_action"] = data
+    return state
+def safety_node(state: AgentState):
+    """
+    Evaluate the information provided and output the response for the user request.
+    """
+    user_input = state["messages"][-1].content
+    information = state["information"]
+    prompt = f"""
+You are a response agent.
+You must reason over the user request and the provided information and output the answer to the user's request.
+You MUST return EXACTLY one line in the following format:
+Response: <answer>
+DO NOT invent anything additional and return only what is asked and in the format asked.
+Only return a response if you are confident about the answer, otherwise return empty string.
+Example of valid json response for user request: Who was the winner of 2025 World Snooker Championship:
+Response: Zhao Xintong.
+Return exactly the above requested format and nothing more!
+DO NOT generate any additional text after it!
+User request:
+{user_input}
+Information:
+{information}
+"""
+    raw_output = reasoning_generate(prompt)
+    # raw_output = generate(prompt)
+    logger.info(f"Raw Output: {raw_output}")
+    output = raw_output.split("Response:")[-1].strip()
+    # match = re.search(r"Response:\s*(.*)", raw_output, re.IGNORECASE)
+    # output = match.group(1).strip() if match else ""
+    if len(output) > 2 and output[0] == '"' and output[-1] == '"':
+        output = output[1:-1]
+    if len(output) > 2 and output[-1] == '.':
+        output = output[:-1]
+    state["output"] = output
+    logger.info(f"State (Safety Agent): {state}")
+    return state
+def Judge(state: AgentState):
+    """
+    Evaluate whether the answer provided is indeed based on the information provided or not.
+    """
+    answer = state["output"]
+    information = state["information"]
+    user_input = state["messages"][-1].content
+    prompt = f"""
+You are a Judging agent.
+You must reason over the user request and judge with a confidence score whether the answer is indeed based on the provided information or not.
+Example: User request: Who was the winner of 2025 World Snooker Championship?
+Information: Zhao Xintong won the 2025 World Snooker Championship with a dominant 18-12 final victory over Mark Williams in Sheffield on Monday. The 28 year-old becomes the first player from China to win snooker’s premier prize at the Crucible Theatre.
+Zhao, who collects a top prize worth £500,000, additionally becomes the first player under amateur status to go all the way to victory in a World Snooker Championship.
+The former UK champion entered the competition in the very first qualifying round at the English Institute of Sport last month.
+He compiled a dozen century breaks as he fought his way through four preliminary rounds in fantastic fashion to qualify for the Crucible for the third time in his career.
+In the final round of the qualifiers known as Judgement Day, Zhao edged Elliot Slessor 10-8 in a high-quality affair during which both players made a hat-trick of tons.
+Ironically, that probably represented his sternest test throughout the entire event.
+Answer: "Zhao Xintong"
+Response: {{
+    "confidence": 1.0,
+    "explanation": Based on the information provided, it is indeed mentioned that Zhao Xingong, which is the answer provided, won the 2025 World Snooker Championship.
+}}
+Example: User request: Who was the winner of 2025 World Snooker Championship?
+Information: Zhao Xintong won the 2025 World Snooker Championship with a dominant 18-12 final victory over Mark Williams in Sheffield on Monday. The 28 year-old becomes the first player from China to win snooker’s premier prize at the Crucible Theatre.
+Zhao, who collects a top prize worth £500,000, additionally becomes the first player under amateur status to go all the way to victory in a World Snooker Championship.
+The former UK champion entered the competition in the very first qualifying round at the English Institute of Sport last month.
+He compiled a dozen century breaks as he fought his way through four preliminary rounds in fantastic fashion to qualify for the Crucible for the third time in his career.
+In the final round of the qualifiers known as Judgement Day, Zhao edged Elliot Slessor 10-8 in a high-quality affair during which both players made a hat-trick of tons.
+Ironically, that probably represented his sternest test throughout the entire event.
+Answer: "Ronnie O'sullivan"
+Response: {{
+    "confidence": 0.0,
+    "explanation": Based on the information provided, it is was Zhao Xingong and not Ronnie O'sullivan who won the 2025 World Snooker Championship.
+}}
+Example: User request: Who was the winner of 2025 World Snooker Championship?
+Information:
+Answer: "Ronnie O'sullivan"
+Response: {{
+    "confidence": 0.0,
+    "explanation": There is no information provided, so cannot answer who won the 2025 World Snooker Championship.
+}}
+Return exactly the above requested format and nothing more!
+DO NOT generate any additional text after it!
+Return only what is asked and in the format asked!
+User request:
+{user_input}
+Information:
+{information}
+Answer:
+{answer}
+"""
+    raw_output = generate(prompt)
+    print(f"Judge raw output: {raw_output}")
+    output = raw_output.split("Response:")[-1].strip()
+    fixed = repair_json(output)
+    data = json.loads(fixed)
+    state["confidence"] = data["confidence"]
+    state["judge_explanation"] = data["explanation"]
+    logger.info(f"State (Judge Agent): {state}")
+    return state
+def route(state: AgentState):
+    """Determine the next step based on Safety Agent classification"""
+    if state["risk_score"] > 0.5:
+        return "block"
+    else:
+        return "allow"
+def tool_executor(state: AgentState):
+    """
+    Tool execution node for a risk-aware LLM agent.
+    This node executes the validated and approved tool call proposed by the
+    planner and assessed by the safety layer. It conditionally dispatches
+    execution based on the safety decision and updates the agent state with
+    the final output.
+    Parameters
+    ----------
+    state : dict
+        Agent state dictionary containing:
+        - "decision" (str): Safety decision ("allow" or blocking variant).
+        - "risk_score" (float): Computed risk score.
+        - "proposed_action" (dict): Validated tool call in structured form.
+    Returns
+    -------
+    dict
+        Updated state dictionary including:
+        - "output" (str): Result of tool execution OR block message.
+    Execution Flow
+    --------------
+    1. If the safety decision is not "allow":
+       - Skip tool execution.
+       - Return a blocked message including the risk score.
+    2. If allowed:
+       - Validate the proposed action using the `Action` schema.
+       - Dispatch execution to the appropriate tool implementation:
+            * "google_calendar"
+            * "reply_email"
+            * "share_credentials"
+       - Store tool result in `state["output"]`.
+    3. If the tool is unrecognized:
+       - Return "Unknown tool" as a fallback response.
+    Security Considerations
+    -----------------------
+    - Execution only occurs after passing the safety node.
+    - No runtime sandboxing is implemented.
+    - No per-tool authorization layer (RBAC) is enforced.
+    - Sensitive tools (e.g., credential exposure) should require:
+        * Elevated approval thresholds
+        * Human-in-the-loop confirmation
+        * Additional auditing
+    Architectural Role
+    ------------------
+    Planner → Safety → Tool Execution → Logger
+    This node represents the controlled execution layer of the agent,
+    responsible for translating structured LLM intent into real system actions.
+    """
+    web_page_result = ""
+    action = Action.model_validate(state["proposed_action"])
+    best_query_webpage_information_similarity_score = -1.0
+    best_webpage_information = ""
+    webpage_information_complete = ""
+    if action.tool == "web_search":
+        logger.info(f"action.tool: {action.tool}")
+        query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+        query_arg_embeddings = sentence_transformer_model.encode_query(state["proposed_action"]["args"]["query"]).reshape(1, -1)
+        score = float(cosine_similarity(query_embeddings, query_arg_embeddings)[0][0])
+        if score > 0.80:
+            results = web_search(**action.args)
+        else:
+            logger.info(f"Overwriting user query because the Agent suggested query had score: {state["proposed_action"]["args"]["query"]} - {score}")
+            results = web_search(**{"query": state["messages"][-1].content})
+        logger.info(f"Webpages - Results: {results}")
+        for result in results:
+            try:
+                web_page_results = visit_webpage(result)
+                for web_page_result in web_page_results:
+                    query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
+                    webpage_information_embeddings = sentence_transformer_model.encode_query(web_page_result).reshape(1, -1)
+                    query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
+                    # logger.info(f"Webpage Information and Similarity Score: {web_page_result} - {query_webpage_information_similarity_score}")
+                    if query_webpage_information_similarity_score > 0.60:
+                        webpage_information_complete += web_page_result
+                        webpage_information_complete += " \n "
+                        webpage_information_complete += " \n "
+                    if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
+                        best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
+                        best_webpage_information = web_page_result
+            except Exception as e:
+                logger.info(f"Tool Executor - Exception: {e}")
+    elif action.tool == "visit_webpage":
+        try:
+            web_page_result = visit_webpage(**action.args)
+        except:
+            pass
+    else:
+        result = "Unknown tool"
+    state["information"] = webpage_information_complete
+    state["best_query_webpage_information_similarity_score"] = best_query_webpage_information_similarity_score
+    logger.info(f"Information: {state['information']}")
+    logger.info(f"Information: {state['best_query_webpage_information_similarity_score']}")
+    return state
+safe_workflow = StateGraph(AgentState)
+# safe_workflow = StateGraph(dict)
+safe_workflow.add_node("planner", planner_node)
+safe_workflow.add_node("tool_executor", tool_executor)
+safe_workflow.add_node("safety", safety_node)
+# safe_workflow.add_node("judge", Judge)
+# safe_workflow.set_entry_point("planner")
+safe_workflow.add_edge(START, "planner")
+safe_workflow.add_edge("planner", "tool_executor")
+safe_workflow.add_edge("tool_executor", "safety")
+# safe_workflow.add_edge("safety", "judge")
+# safe_workflow.add_conditional_edges(
+#     "safety",
+#     route,
+#     {
+#         "allow": "tool_executor",
+#         "block": END,
+#     },
+# )
+# safe_workflow.add_edge("tool_executor", END)
+# safe_app = safe_workflow.compile()
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class BasicAgent:
+    def __init__(self):
+        self.safe_app = safe_workflow.compile()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         # if question == "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.":
         # if " image " not in question and " video " not in question:
+        if question == "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?":
+            state = {
+                "messages": question,
+            }
+            response = self.safe_app.invoke(state)
+            agent_answer = response["output"]
         else:
             agent_answer = fixed_answer
             # agent_answer = self.agent.run(question)