Sandiago21 commited on
Commit
ea150d1
·
verified ·
1 Parent(s): fd48184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +651 -87
app.py CHANGED
@@ -1,51 +1,148 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import inspect
5
- import pandas as pd
6
- from smolagents import CodeAgent, DuckDuckGoSearchTool, load_tool, tool
7
- from smolagents.models import TransformersModel
8
- import datetime
9
- import requests
10
- import pytz
11
- import yaml
12
- # from tools.final_answer import FinalAnswerTool
13
- from PIL import Image
14
- from io import BytesIO
15
- # from smolagents.tools import BaseTool
16
  import requests
17
  from bs4 import BeautifulSoup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # (Keep Constants as is)
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- # class VisitWebpageTool(BaseTool):
24
- # name = "visit_webpage"
25
- # description = "Fetch and read the content of a webpage"
26
- # inputs = {"url": {"type": "string"}}
27
- # output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # def __call__(self, url: str):
30
- # # response = requests.get(url)
31
- # # soup = BeautifulSoup(response.text, "html.parser")
32
- # # return soup.get_text()[:5000] # truncate for safety
 
 
 
 
 
 
 
 
 
33
 
34
- # headers = {
35
- # "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
36
- # }
37
- # response = requests.get(url, headers=headers, timeout=10)
38
- # response.raise_for_status()
39
- # soup = BeautifulSoup(response.text, "html.parser")
40
 
41
- # # Extract only readable text
42
- # paragraphs = [p.get_text() for p in soup.find_all("p")]
43
- # text = "\n".join(paragraphs)
 
 
 
 
 
44
 
45
- # return text[:5000] # limit size
 
 
 
 
 
 
 
 
46
 
47
 
48
- @tool
49
  def visit_webpage(url: str) -> str:
50
  """
51
  Fetch and read the content of a webpage.
@@ -54,6 +151,7 @@ def visit_webpage(url: str) -> str:
54
  Returns:
55
  Extracted readable text (truncated)
56
  """
 
57
  headers = {
58
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
59
  }
@@ -66,64 +164,525 @@ def visit_webpage(url: str) -> str:
66
  paragraphs = [p.get_text() for p in soup.find_all("p")]
67
  text = "\n".join(paragraphs)
68
 
69
- return text[:5000]
70
 
71
 
72
- # --- Basic Agent Definition ---
73
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
74
- class BasicAgent:
75
- def __init__(self):
76
- self.model = TransformersModel(
77
- model_id="Qwen/Qwen2.5-Coder-7B-Instruct",
78
- max_new_tokens=768,
79
- temperature=0.1,
80
- )
 
 
 
 
81
 
82
- # with open("prompts.yaml", 'r') as stream:
83
- # prompt_templates = yaml.safe_load(stream)
 
 
84
 
85
- # prompt_templates["final_answer"] = {
86
- # "pre_messages": """You have reached the end of the task.
87
-
88
- # Review your reasoning and ensure that your result is correct.
89
- # You must now return the final answer using the `final_answer()` tool.
90
-
91
- # Do not output plain text — only use the tool.""",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- # "post_messages": """Write Python code that calls:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- # final_answer(result)
96
 
97
- # Where `result` is the final answer to the task.
 
 
 
98
 
99
- # Do not print anything else.
100
- # Do not return explanations.
101
- # Only call `final_answer` and do a final reason to make sure that you answer the question clearly and directly without additional information if not requested."""
102
- # }
103
-
104
- self.prompt_templates = {'system_prompt': 'You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.\nTo do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.\nTo solve the task, you must plan forward to proceed in a series of steps, in a cycle of \'Thought:\', \'Code:\', and \'Observation:\' sequences.\nAt each step, in the \'Thought:\' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.\nThen in the \'Code:\' sequence, you should write the code in simple Python. The code sequence must end with \'<end_code>\' sequence.\nDuring each intermediate step, you can use \'print()\' to save whatever important information you will then need.\nThese print outputs will then appear in the \'Observation:\' field, which will be available as input for the next step.\nIn the end you have to return a final answer using the `final_answer` tool.\n\nYou must follow EXACTLY this format:\n\nThought:\n<code>\n# Python code here\n</code>\n\nRules:\n- ALWAYS use <code> and </code>\n- DO NOT use markdown code blocks\n- Use only valid Python\n\nCRITICAL:\n- If the answer requires external information (facts, data, current info), you MUST use a tool.\n- DO NOT guess or hallucinate.\n- DO NOT answer from memory if unsure.\n- Prefer using tools over guessing.\n– If a search result contains a useful link, you MUST use visit_webpage(url) to read it. Do not stop at search results.\n\nYou are NOT allowed to use requests, urllib, or any direct HTTP calls.\n\nAvailable tools:\n- duckduckgo_search(query: str)\n- visit_webpage(url: str)\n\nTo access web content, you MUST use:\n- web_search(query)\n- visit_webpage(url)\n\nAny other method is invalid.\n\nHere are the rules you should always follow to solve your task:\n1. Use only variables that you have defined!\n2. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in \'answer = wiki({\'query\': "What is the place where James Bond lives?"})\', but use the arguments directly as in \'answer = wiki(query="What is the place where James Bond lives?")\'.\n3. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.\n4. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.\n5. Don\'t name any new variable with the same name as a tool: for instance don\'t name a variable \'final_answer\'.\n6. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.\n7. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}\n8. The state persists between code executions: so if in one step you\'ve created variables or imported modules, these will all persist.\n9. Don\'t give up! You\'re in charge of solving the task, not providing directions to solve it.\n\n\nExample:\n\nTask: What is the population of Paris?\n\nThought:\n<code>\nresult = web_search("Paris population")\nprint(result)\n</code>\n\nExample:\n\nTask: Who wrote the novel "1984"?\n\nThought:\n<code>\nresults = web_search("1984 novel author")\nprint(results)\n</code>\n\nThought:\n<code>\npage = visit_webpage(url=results[0])\nprint(page)\n</code>\n\nThought:\n<code>\nfinal_answer("George Orwell")\n</code>\n\nWhen the task is solved, return:\n<code>\nfinal_answer(result)\n</code>\n',
105
- 'planning': {'initial_facts': 'Below I will present you a task.\nYou will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need.\nTo do so, you will have to read the task and identify things that must be discovered in order to successfully complete it.\nDon\'t make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey:\n\n---\n### 1. Facts given in the task\nList here the specific facts given in the task that could help you (there might be nothing here).\n\n### 2. Facts to look up\nList here any facts that we may need to look up.\nAlso list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here.\n\n### 3. Facts to derive\nList here anything that we want to derive from the above by logical reasoning, for instance computation or simulation.\n\nKeep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings:\n### 1. Facts given in the task\n### 2. Facts to look up\n### 3. Facts to derive\nDo not add anything else.',
106
- 'initial_plan': "You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.\nNow for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the '\\n<end_plan>' tag and stop there.\n\nHere is your task:\n\nTask:\n```\n{{task}}\n```\nYou can leverage these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name }}: {{ tool.description }}\n Takes inputs: {{tool.inputs}}\n Returns an output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'request', a long string explaining your request.\nGiven that this team member is a real human, you should be very verbose in your request.\nHere is a list of the team members that you can call:\n{%- for agent in managed_agents.values() %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n{%- endif %}\n\nList of facts that you know:\n```\n{{answer_facts}}\n```\n\nNow begin! Write your plan below.",
107
- 'update_facts_pre_messages': 'You are a world expert at gathering known and unknown facts based on a conversation.\nBelow you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these:\n### 1. Facts given in the task\n### 2. Facts that we have learned\n### 3. Facts still to look up\n### 4. Facts still to derive\nFind the task and history below:',
108
- 'update_facts_post_messages': "Earlier we've built a list of facts.\nBut since in your previous steps you may have learned useful new facts or invalidated some false ones.\nPlease update your list of facts based on the previous history, and provide these headings:\n### 1. Facts given in the task\n### 2. Facts that we have learned\n### 3. Facts still to look up\n### 4. Facts still to derive\nNow write your new list of facts below.",
109
- 'update_plan_pre_messages': 'You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools.\nYou have been given a task:\n```\n{{task}}\n```\n\nFind below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task.\nIf the previous tries so far have met some success, you can make an updated plan based on these actions.\nIf you are stalled, you can make a completely new plan starting from scratch.',
110
- 'update_plan_post_messages': "You're still working towards solving this task:\n```\n{{task}}\n```\nYou can leverage these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name }}: {{ tool.description }}\n Takes inputs: {{tool.inputs}}\n Returns an output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents and managed_agents.values() | list %}\nYou can also give tasks to team members.\nCalling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'.\nGiven that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary.\nHere is a list of the team members that you can call:\n{%- for agent in managed_agents.values() %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n{%- endif %}\n\nHere is the up to date list of facts that you know:\n```\n{{facts_update}}\n```\n\nNow for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts.\nThis plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the '\\n<end_plan>' tag and stop there.\n\nNow write your new plan below."},
111
- 'managed_agent': {'task': "You're a helpful agent named '{{name}}'.\nYou have been submitted this task by your manager.\n---\nTask:\n{{task}}\n---\nYou're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer.\nYour final_answer WILL HAVE to contain these parts:\n### 1. Task outcome (short version):\n### 2. Task outcome (extremely detailed version):\n### 3. Additional context (if relevant):\n\nPut all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost.\nAnd even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.",
112
- 'report': "Here is the final answer from your managed agent '{{name}}':\n{{final_answer}}"},
113
- 'final_answer': {'pre_messages': 'Return the final answer as a SINGLE value.\n\nIf multiple items are required:\n- return a comma-separated string\n- do NOT return a list\n\nExamples:\n- "a,b,c"\n- "42"\n- "3.14"\n',
114
- 'post_messages': 'Write:\n\n<code>\nfinal_answer(result)\n</code>\n\nWhere result is:\n- a string\n- or a number\n- NEVER a list or array\n'}}
115
-
116
- self.web_search = DuckDuckGoSearchTool()
117
- # self.visit_webpage = VisitWebpageTool()
118
-
119
- self.agent = CodeAgent(
120
- model=self.model,
121
- tools=[self.web_search, visit_webpage,],
122
- max_steps=5,
123
- verbosity_level=1,
124
- additional_authorized_imports=["json", "pandas", "wiki", 'random', 'time', 'itertools', 'statistics', 'queue', 'math', 'collections', 'datetime', 'unicodedata', 're', 'stat'],
125
- prompt_templates=self.prompt_templates,
126
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  print("BasicAgent initialized.")
129
  def __call__(self, question: str) -> str:
@@ -134,8 +693,13 @@ class BasicAgent:
134
 
135
  # if question == "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.":
136
  # if " image " not in question and " video " not in question:
137
- if question == "ssWho nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?":
138
- agent_answer = self.agent.run(question)
 
 
 
 
 
139
  else:
140
  agent_answer = fixed_answer
141
  # agent_answer = self.agent.run(question)
 
1
+ import re
2
+ import warnings
3
+ warnings.filterwarnings("ignore")
4
+ import json
5
+ import logging
6
+ from typing import TypedDict, Annotated, Dict, Any
7
+ from json_repair import repair_json
 
 
 
 
 
 
 
 
8
  import requests
9
  from bs4 import BeautifulSoup
10
+ from pydantic import BaseModel, Field
11
+ from typing import Dict
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM
13
+ import torch
14
+ from langgraph.graph import StateGraph, START, END
15
+ from langgraph.graph.message import add_messages
16
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
17
+ from IPython.display import display, Markdown, Image
18
+ from langchain_community.retrievers import BM25Retriever
19
+ from langchain_core.tools import Tool
20
+ from langchain_core.documents import Document
21
+ from langgraph.prebuilt import ToolNode, tools_condition
22
+ from sentence_transformers import SentenceTransformer
23
+ from sklearn.metrics.pairwise import cosine_similarity
24
+ # from langchain.agents import create_tool_calling_agent
25
 
26
  # (Keep Constants as is)
27
  # --- Constants ---
28
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
29
 
30
+ sentence_transformer_model = SentenceTransformer("all-mpnet-base-v2")
31
+
32
+ logger = logging.getLogger("agent")
33
+ logging.basicConfig(level=logging.INFO)
34
+
35
+ class Config(object):
36
+ def __init__(self):
37
+ self.random_state = 42
38
+ self.max_len = 256
39
+ self.reasoning_max_len = 128
40
+ self.temperature = 0.1
41
+ self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
42
+ self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
43
+ # self.reasoning_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
44
+ # self.reasoning_model_name = "Qwen/Qwen2.5-7B-Instruct"
45
+ self.reasoning_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
46
+
47
+
48
+ config = Config()
49
+
50
+ tokenizer = AutoTokenizer.from_pretrained(config.model_name)
51
+ model = AutoModelForCausalLM.from_pretrained(
52
+ config.model_name,
53
+ torch_dtype=torch.float16,
54
+ device_map="auto"
55
+ )
56
+
57
+ reasoning_tokenizer = AutoTokenizer.from_pretrained(config.reasoning_model_name)
58
+ reasoning_model = AutoModelForCausalLM.from_pretrained(
59
+ config.reasoning_model_name,
60
+ torch_dtype=torch.float16,
61
+ device_map="auto"
62
+ )
63
+
64
+ def generate(prompt):
65
+ """
66
+ Generate a text completion from a causal language model given a prompt.
67
+
68
+ Parameters
69
+ ----------
70
+ prompt : str
71
+ Input text prompt used to condition the language model.
72
+
73
+ Returns
74
+ -------
75
+ str
76
+ The generated continuation text, decoded into a string with special
77
+ tokens removed and leading/trailing whitespace stripped.
78
+
79
+ """
80
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
81
+
82
+ with torch.no_grad():
83
+ outputs = model.generate(
84
+ **inputs,
85
+ max_new_tokens=config.max_len,
86
+ temperature=config.temperature,
87
+ )
88
+
89
+ generated = outputs[0][inputs["input_ids"].shape[-1]:]
90
+
91
+ return tokenizer.decode(generated, skip_special_tokens=True).strip()
92
+
93
+ def reasoning_generate(prompt):
94
+ """
95
+ Generate a text completion from a causal language model given a prompt.
96
+
97
+ Parameters
98
+ ----------
99
+ prompt : str
100
+ Input text prompt used to condition the language model.
101
+
102
+ Returns
103
+ -------
104
+ str
105
+ The generated continuation text, decoded into a string with special
106
+ tokens removed and leading/trailing whitespace stripped.
107
 
108
+ """
109
+ inputs = reasoning_tokenizer(prompt, return_tensors="pt").to(model.device)
110
+
111
+ with torch.no_grad():
112
+ outputs = reasoning_model.generate(
113
+ **inputs,
114
+ max_new_tokens=config.reasoning_max_len,
115
+ temperature=config.temperature,
116
+ )
117
+
118
+ generated = outputs[0][inputs["input_ids"].shape[-1]:]
119
+
120
+ return reasoning_tokenizer.decode(generated, skip_special_tokens=True).strip()
121
 
122
+ class Action(BaseModel):
123
+ tool: str = Field(...)
124
+ args: Dict
 
 
 
125
 
126
+ # Generate the AgentState and Agent graph
127
+ class AgentState(TypedDict):
128
+ messages: Annotated[list[AnyMessage], add_messages]
129
+ proposed_action: str
130
+ information: str
131
+ output: str
132
+ confidence: float
133
+ judge_explanation: str
134
 
135
+ ALL_TOOLS = {
136
+ "web_search": ["query"],
137
+ "visit_webpage": ["url"],
138
+ }
139
+
140
+ ALLOWED_TOOLS = {
141
+ "web_search": ["query"],
142
+ "visit_webpage": ["url"],
143
+ }
144
 
145
 
 
146
  def visit_webpage(url: str) -> str:
147
  """
148
  Fetch and read the content of a webpage.
 
151
  Returns:
152
  Extracted readable text (truncated)
153
  """
154
+
155
  headers = {
156
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"
157
  }
 
164
  paragraphs = [p.get_text() for p in soup.find_all("p")]
165
  text = "\n".join(paragraphs)
166
 
167
+ return (text[:500], text[500:1000])
168
 
169
 
170
+ def web_search(query: str, num_results: int = 10):
171
+ """
172
+ Search the internet for the query provided
173
+ Args:
174
+ query: Query to search in the internet
175
+ Returns:
176
+ list of urls
177
+ """
178
+
179
+ url = "https://html.duckduckgo.com/html/"
180
+ headers = {
181
+ "User-Agent": "Mozilla/5.0"
182
+ }
183
 
184
+ response = requests.post(url, data={"q": query}, headers=headers)
185
+
186
+ soup = BeautifulSoup(response.text, "html.parser")
187
+ return [a.get("href") for a in soup.select(".result__a")[:num_results]]
188
 
189
+ def planner_node(state: AgentState):
190
+ """
191
+ Planning node for a tool-using LLM agent.
192
+
193
+ The planner enforces:
194
+ - Strict JSON-only output
195
+ - Tool selection constrained to predefined tools
196
+ - Argument generation limited to user-provided information
197
+
198
+ Parameters
199
+ ----------
200
+ state : dict
201
+ Agent state dictionary containing:
202
+ - "messages" (str): The user's natural language request.
203
+
204
+ Returns
205
+ -------
206
+ dict
207
+ Updated state dictionary with additional keys:
208
+ - "proposed_action" (dict): Parsed JSON tool call in the form:
209
+ {
210
+ "tool": "<tool_name>",
211
+ "args": {...}
212
+ }
213
+ - "risk_score" (float): Initialized risk score (default 0.0).
214
+ - "decision" (str): Initial decision ("allow" by default).
215
+
216
+ Behavior
217
+ --------
218
+ 1. Constructs a planning prompt including:
219
+ - Available tools and allowed arguments
220
+ - Strict JSON formatting requirements
221
+ - Example of valid output
222
+ 2. Calls the language model via `generate()`.
223
+ 3. Attempts to extract valid JSON from the model output.
224
+ 4. Repairs malformed JSON using `repair_json`.
225
+ 5. Stores the parsed action into the agent state.
226
+
227
+ Security Notes
228
+ --------------
229
+ - This node does not enforce tool-level authorization.
230
+ - It does not validate hallucinated tools.
231
+ - It does not perform risk scoring beyond initializing values.
232
+ - Downstream nodes must implement:
233
+ * Tool whitelist validation
234
+ * Argument validation
235
+ * Risk scoring and mitigation
236
+ * Execution authorization
237
+
238
+ Intended Usage
239
+ --------------
240
+ Designed for multi-agent or LangGraph-style workflows where:
241
+ Planner → Risk Assessment → Tool Executor → Logger
242
+
243
+ This node represents the *planning layer* of the agent architecture.
244
+ """
245
+
246
+ user_input = state["messages"][-1].content
247
+
248
+ prompt = f"""
249
+ You are a planning agent.
250
+
251
+ You MUST return ONLY valid JSON as per the tools specs below ONLY.
252
+ No extra text.
253
+ DO NOT invent anything additional beyond the user request provided. Keep it strict to the user request information provided. The question and the query should be fully relevant to the user request provided, no deviation and hallucination. If possible and makes sense then the query should be exactly the user request.
254
+
255
+ The available tools and their respective arguments are: {{
256
+ "web_search": ["query"],
257
+ "visit_webpage": ["url"],
258
+ }}
259
+
260
+ Return exactly the following format:
261
+ Response:
262
+ {{
263
+ "tool": "...",
264
+ "args": {{...}}
265
+ }}
266
+
267
+ User request: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?. Example of valid JSON expected:
268
+ Response:
269
+ {{"tool": "web_search",
270
+ "args": {{"query": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
271
+ }}
272
+ }}
273
+
274
+ Return only one Response!
275
+
276
+ User request:
277
+ {user_input}
278
+ """
279
+
280
+ output = generate(prompt)
281
+
282
+ state["proposed_action"] = output.split("Response:")[-1]
283
+ fixed = repair_json(state["proposed_action"])
284
+ data = json.loads(fixed)
285
+ state["proposed_action"] = data
286
+
287
+ return state
288
+
289
+ def planner_node(state: AgentState):
290
+ """
291
+ Planning node for a tool-using LLM agent.
292
+
293
+ The planner enforces:
294
+ - Strict JSON-only output
295
+ - Tool selection constrained to predefined tools
296
+ - Argument generation limited to user-provided information
297
+
298
+ Parameters
299
+ ----------
300
+ state : dict
301
+ Agent state dictionary containing:
302
+ - "messages" (str): The user's natural language request.
303
+
304
+ Returns
305
+ -------
306
+ dict
307
+ Updated state dictionary with additional keys:
308
+ - "proposed_action" (dict): Parsed JSON tool call in the form:
309
+ {
310
+ "tool": "<tool_name>",
311
+ "args": {...}
312
+ }
313
+ - "risk_score" (float): Initialized risk score (default 0.0).
314
+ - "decision" (str): Initial decision ("allow" by default).
315
+
316
+ Behavior
317
+ --------
318
+ 1. Constructs a planning prompt including:
319
+ - Available tools and allowed arguments
320
+ - Strict JSON formatting requirements
321
+ - Example of valid output
322
+ 2. Calls the language model via `generate()`.
323
+ 3. Attempts to extract valid JSON from the model output.
324
+ 4. Repairs malformed JSON using `repair_json`.
325
+ 5. Stores the parsed action into the agent state.
326
+
327
+ Security Notes
328
+ --------------
329
+ - This node does not enforce tool-level authorization.
330
+ - It does not validate hallucinated tools.
331
+ - It does not perform risk scoring beyond initializing values.
332
+ - Downstream nodes must implement:
333
+ * Tool whitelist validation
334
+ * Argument validation
335
+ * Risk scoring and mitigation
336
+ * Execution authorization
337
+
338
+ Intended Usage
339
+ --------------
340
+ Designed for multi-agent or LangGraph-style workflows where:
341
+ Planner → Risk Assessment → Tool Executor → Logger
342
+
343
+ This node represents the *planning layer* of the agent architecture.
344
+ """
345
+
346
+ user_input = state["messages"][-1].content
347
+
348
+ prompt = f"""
349
+ You are a planning agent.
350
+
351
+ You MUST return ONLY valid JSON as per the tools specs below ONLY.
352
+ No extra text.
353
+ DO NOT invent anything additional beyond the user request provided. Keep it strict to the user request information provided. The question and the query should be fully relevant to the user request provided, no deviation and hallucination. If possible and makes sense then the query should be exactly the user request.
354
+
355
+ The available tools and their respective arguments are: {{
356
+ "web_search": ["query"],
357
+ "visit_webpage": ["url"],
358
+ }}
359
+
360
+ Return exactly the following format:
361
+ Response:
362
+ {{
363
+ "tool": "...",
364
+ "args": {{...}}
365
+ }}
366
+
367
+ User request: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?. Example of valid JSON expected:
368
+ Response:
369
+ {{"tool": "web_search",
370
+ "args": {{"query": "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
371
+ }}
372
+ }}
373
+
374
+ Return only one Response!
375
+
376
+ User request:
377
+ {user_input}
378
+ """
379
+
380
+ output = generate(prompt)
381
+
382
+ state["proposed_action"] = output.split("Response:")[-1]
383
+ fixed = repair_json(state["proposed_action"])
384
+ data = json.loads(fixed)
385
+ state["proposed_action"] = data
386
+
387
+ return state
388
+
389
+ def safety_node(state: AgentState):
390
+ """
391
+ Evaluate the information provided and output the response for the user request.
392
+ """
393
+
394
+ user_input = state["messages"][-1].content
395
+ information = state["information"]
396
+
397
+ prompt = f"""
398
+ You are a response agent.
399
+
400
+ You must reason over the user request and the provided information and output the answer to the user's request.
401
+
402
+ You MUST return EXACTLY one line in the following format:
403
+ Response: <answer>
404
+
405
+ DO NOT invent anything additional and return only what is asked and in the format asked.
406
+
407
+ Only return a response if you are confident about the answer, otherwise return empty string.
408
+
409
+ Example of valid json response for user request: Who was the winner of 2025 World Snooker Championship:
410
+ Response: Zhao Xintong.
411
+
412
+ Return exactly the above requested format and nothing more!
413
+ DO NOT generate any additional text after it!
414
+
415
+ User request:
416
+ {user_input}
417
+
418
+ Information:
419
+ {information}
420
+ """
421
+
422
+ raw_output = reasoning_generate(prompt)
423
+ # raw_output = generate(prompt)
424
+
425
+ logger.info(f"Raw Output: {raw_output}")
426
+
427
+ output = raw_output.split("Response:")[-1].strip()
428
+ # match = re.search(r"Response:\s*(.*)", raw_output, re.IGNORECASE)
429
+ # output = match.group(1).strip() if match else ""
430
+
431
+ if len(output) > 2 and output[0] == '"' and output[-1] == '"':
432
+ output = output[1:-1]
433
+
434
+ if len(output) > 2 and output[-1] == '.':
435
+ output = output[:-1]
436
+
437
+ state["output"] = output
438
+
439
+ logger.info(f"State (Safety Agent): {state}")
440
+
441
+ return state
442
+
443
+
444
+ def Judge(state: AgentState):
445
+ """
446
+ Evaluate whether the answer provided is indeed based on the information provided or not.
447
+ """
448
+
449
+ answer = state["output"]
450
+ information = state["information"]
451
+ user_input = state["messages"][-1].content
452
+
453
+ prompt = f"""
454
+ You are a Judging agent.
455
+
456
+ You must reason over the user request and judge with a confidence score whether the answer is indeed based on the provided information or not.
457
+
458
+ Example: User request: Who was the winner of 2025 World Snooker Championship?
459
+ Information: Zhao Xintong won the 2025 World Snooker Championship with a dominant 18-12 final victory over Mark Williams in Sheffield on Monday. The 28 year-old becomes the first player from China to win snooker’s premier prize at the Crucible Theatre.
460
+ Zhao, who collects a top prize worth £500,000, additionally becomes the first player under amateur status to go all the way to victory in a World Snooker Championship.
461
+ The former UK champion entered the competition in the very first qualifying round at the English Institute of Sport last month.
462
+ He compiled a dozen century breaks as he fought his way through four preliminary rounds in fantastic fashion to qualify for the Crucible for the third time in his career.
463
+ In the final round of the qualifiers known as Judgement Day, Zhao edged Elliot Slessor 10-8 in a high-quality affair during which both players made a hat-trick of tons.
464
+ Ironically, that probably represented his sternest test throughout the entire event.
465
+ Answer: "Zhao Xintong"
466
+
467
+ Response: {{
468
+ "confidence": 1.0,
469
+ "explanation": Based on the information provided, it is indeed mentioned that Zhao Xingong, which is the answer provided, won the 2025 World Snooker Championship.
470
+ }}
471
+
472
+
473
+ Example: User request: Who was the winner of 2025 World Snooker Championship?
474
+ Information: Zhao Xintong won the 2025 World Snooker Championship with a dominant 18-12 final victory over Mark Williams in Sheffield on Monday. The 28 year-old becomes the first player from China to win snooker’s premier prize at the Crucible Theatre.
475
+ Zhao, who collects a top prize worth £500,000, additionally becomes the first player under amateur status to go all the way to victory in a World Snooker Championship.
476
+ The former UK champion entered the competition in the very first qualifying round at the English Institute of Sport last month.
477
+ He compiled a dozen century breaks as he fought his way through four preliminary rounds in fantastic fashion to qualify for the Crucible for the third time in his career.
478
+ In the final round of the qualifiers known as Judgement Day, Zhao edged Elliot Slessor 10-8 in a high-quality affair during which both players made a hat-trick of tons.
479
+ Ironically, that probably represented his sternest test throughout the entire event.
480
+ Answer: "Ronnie O'sullivan"
481
+
482
+ Response: {{
483
+ "confidence": 0.0,
484
+ "explanation": Based on the information provided, it is was Zhao Xingong and not Ronnie O'sullivan who won the 2025 World Snooker Championship.
485
+ }}
486
+
487
+ Example: User request: Who was the winner of 2025 World Snooker Championship?
488
+ Information:
489
+ Answer: "Ronnie O'sullivan"
490
+
491
+ Response: {{
492
+ "confidence": 0.0,
493
+ "explanation": There is no information provided, so cannot answer who won the 2025 World Snooker Championship.
494
+ }}
495
+
496
+ Return exactly the above requested format and nothing more!
497
+ DO NOT generate any additional text after it!
498
+ Return only what is asked and in the format asked!
499
+
500
+ User request:
501
+ {user_input}
502
+
503
+ Information:
504
+ {information}
505
+
506
+ Answer:
507
+ {answer}
508
+ """
509
+
510
+ raw_output = generate(prompt)
511
+
512
+ print(f"Judge raw output: {raw_output}")
513
+
514
+ output = raw_output.split("Response:")[-1].strip()
515
+ fixed = repair_json(output)
516
+ data = json.loads(fixed)
517
+
518
+ state["confidence"] = data["confidence"]
519
+ state["judge_explanation"] = data["explanation"]
520
+
521
+ logger.info(f"State (Judge Agent): {state}")
522
+
523
+ return state
524
+
525
+
526
+ def route(state: AgentState):
527
+ """Determine the next step based on Safety Agent classification"""
528
+ if state["risk_score"] > 0.5:
529
+ return "block"
530
+ else:
531
+ return "allow"
532
+
533
+ def tool_executor(state: AgentState):
534
+ """
535
+ Tool execution node for a risk-aware LLM agent.
536
+
537
+ This node executes the validated and approved tool call proposed by the
538
+ planner and assessed by the safety layer. It conditionally dispatches
539
+ execution based on the safety decision and updates the agent state with
540
+ the final output.
541
+
542
+ Parameters
543
+ ----------
544
+ state : dict
545
+ Agent state dictionary containing:
546
+ - "decision" (str): Safety decision ("allow" or blocking variant).
547
+ - "risk_score" (float): Computed risk score.
548
+ - "proposed_action" (dict): Validated tool call in structured form.
549
+
550
+ Returns
551
+ -------
552
+ dict
553
+ Updated state dictionary including:
554
+ - "output" (str): Result of tool execution OR block message.
555
+
556
+ Execution Flow
557
+ --------------
558
+ 1. If the safety decision is not "allow":
559
+ - Skip tool execution.
560
+ - Return a blocked message including the risk score.
561
+
562
+ 2. If allowed:
563
+ - Validate the proposed action using the `Action` schema.
564
+ - Dispatch execution to the appropriate tool implementation:
565
+ * "google_calendar"
566
+ * "reply_email"
567
+ * "share_credentials"
568
+ - Store tool result in `state["output"]`.
569
+
570
+ 3. If the tool is unrecognized:
571
+ - Return "Unknown tool" as a fallback response.
572
+
573
+ Security Considerations
574
+ -----------------------
575
+ - Execution only occurs after passing the safety node.
576
+ - No runtime sandboxing is implemented.
577
+ - No per-tool authorization layer (RBAC) is enforced.
578
+ - Sensitive tools (e.g., credential exposure) should require:
579
+ * Elevated approval thresholds
580
+ * Human-in-the-loop confirmation
581
+ * Additional auditing
582
+
583
+ Architectural Role
584
+ ------------------
585
+ Planner → Safety → Tool Execution → Logger
586
+
587
+ This node represents the controlled execution layer of the agent,
588
+ responsible for translating structured LLM intent into real system actions.
589
+ """
590
+
591
+ web_page_result = ""
592
+ action = Action.model_validate(state["proposed_action"])
593
+
594
+ best_query_webpage_information_similarity_score = -1.0
595
+ best_webpage_information = ""
596
+
597
+ webpage_information_complete = ""
598
+
599
+ if action.tool == "web_search":
600
+ logger.info(f"action.tool: {action.tool}")
601
 
602
+ query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
603
+ query_arg_embeddings = sentence_transformer_model.encode_query(state["proposed_action"]["args"]["query"]).reshape(1, -1)
604
+ score = float(cosine_similarity(query_embeddings, query_arg_embeddings)[0][0])
605
+
606
+ if score > 0.80:
607
+ results = web_search(**action.args)
608
+ else:
609
+ logger.info(f"Overwriting user query because the Agent suggested query had score: {state["proposed_action"]["args"]["query"]} - {score}")
610
+ results = web_search(**{"query": state["messages"][-1].content})
611
+
612
+ logger.info(f"Webpages - Results: {results}")
613
+
614
+ for result in results:
615
+ try:
616
+ web_page_results = visit_webpage(result)
617
+
618
+ for web_page_result in web_page_results:
619
+ query_embeddings = sentence_transformer_model.encode_query(state["messages"][-1].content).reshape(1, -1)
620
+ webpage_information_embeddings = sentence_transformer_model.encode_query(web_page_result).reshape(1, -1)
621
+ query_webpage_information_similarity_score = float(cosine_similarity(query_embeddings, webpage_information_embeddings)[0][0])
622
 
623
+ # logger.info(f"Webpage Information and Similarity Score: {web_page_result} - {query_webpage_information_similarity_score}")
624
 
625
+ if query_webpage_information_similarity_score > 0.60:
626
+ webpage_information_complete += web_page_result
627
+ webpage_information_complete += " \n "
628
+ webpage_information_complete += " \n "
629
 
630
+ if query_webpage_information_similarity_score > best_query_webpage_information_similarity_score:
631
+ best_query_webpage_information_similarity_score = query_webpage_information_similarity_score
632
+ best_webpage_information = web_page_result
633
+
634
+ except Exception as e:
635
+ logger.info(f"Tool Executor - Exception: {e}")
636
+
637
+ elif action.tool == "visit_webpage":
638
+ try:
639
+ web_page_result = visit_webpage(**action.args)
640
+ except:
641
+ pass
642
+ else:
643
+ result = "Unknown tool"
644
+
645
+ state["information"] = webpage_information_complete
646
+ state["best_query_webpage_information_similarity_score"] = best_query_webpage_information_similarity_score
647
+
648
+ logger.info(f"Information: {state['information']}")
649
+ logger.info(f"Information: {state['best_query_webpage_information_similarity_score']}")
650
+
651
+ return state
652
+
653
+ safe_workflow = StateGraph(AgentState)
654
+ # safe_workflow = StateGraph(dict)
655
+
656
+ safe_workflow.add_node("planner", planner_node)
657
+ safe_workflow.add_node("tool_executor", tool_executor)
658
+ safe_workflow.add_node("safety", safety_node)
659
+ # safe_workflow.add_node("judge", Judge)
660
+
661
+ # safe_workflow.set_entry_point("planner")
662
+
663
+ safe_workflow.add_edge(START, "planner")
664
+ safe_workflow.add_edge("planner", "tool_executor")
665
+ safe_workflow.add_edge("tool_executor", "safety")
666
+ # safe_workflow.add_edge("safety", "judge")
667
+ # safe_workflow.add_conditional_edges(
668
+ # "safety",
669
+ # route,
670
+ # {
671
+ # "allow": "tool_executor",
672
+ # "block": END,
673
+ # },
674
+ # )
675
+ # safe_workflow.add_edge("tool_executor", END)
676
+
677
+ # safe_app = safe_workflow.compile()
678
+
679
+
680
+
681
+ # --- Basic Agent Definition ---
682
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
683
+ class BasicAgent:
684
+ def __init__(self):
685
+ self.safe_app = safe_workflow.compile()
686
 
687
  print("BasicAgent initialized.")
688
  def __call__(self, question: str) -> str:
 
693
 
694
  # if question == "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.":
695
  # if " image " not in question and " video " not in question:
696
+ if question == "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?":
697
+ state = {
698
+ "messages": question,
699
+ }
700
+ response = self.safe_app.invoke(state)
701
+
702
+ agent_answer = response["output"]
703
  else:
704
  agent_answer = fixed_answer
705
  # agent_answer = self.agent.run(question)