ajaxwin commited on
Commit
e8c9acc
·
1 Parent(s): 49df6d3

separated actions, prompts

Browse files
data/data_loader.py CHANGED
@@ -156,6 +156,8 @@ def get_related_functions(
156
  return sorted(related)
157
 
158
 
 
 
159
  def get_similar_rule(
160
  contracts: List[Dict[str, Any]],
161
  current_contract_name: str,
@@ -172,7 +174,7 @@ def get_similar_rule(
172
  for contract in contracts:
173
  if contract["contract_name"] == current_contract_name:
174
  fn = get_function_by_name(contract, current_function_name)
175
- if fn and fn.get("property") and fn["property"].get("similar_rule"):
176
  sr = fn["property"]["similar_rule"]
177
  # Look up the referenced function's natspec
178
  for c2 in contracts:
 
156
  return sorted(related)
157
 
158
 
159
+ # ! Function is completely wrong
160
+
161
  def get_similar_rule(
162
  contracts: List[Dict[str, Any]],
163
  current_contract_name: str,
 
174
  for contract in contracts:
175
  if contract["contract_name"] == current_contract_name:
176
  fn = get_function_by_name(contract, current_function_name)
177
+ if fn and fn.get("property") and fn["property"].get("similar_rule"): # ! There is no property or similar_rule field
178
  sr = fn["property"]["similar_rule"]
179
  # Look up the referenced function's natspec
180
  for c2 in contracts:
inference.py CHANGED
@@ -32,6 +32,7 @@ from tasks.task1.environment import Task1Environment
32
  from tasks.task2.environment import Task2Environment
33
  from tasks.task3.environment import Task3Environment
34
  from env.schemas import Action, ActionType
 
35
 
36
  # ─────────────────────────────────────────────────────────────────────────────
37
  # Configuration
@@ -56,28 +57,6 @@ client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)
56
  # Task 1 agent
57
  # ─────────────────────────────────────────────────────────────────────────────
58
 
59
- T1_SYSTEM = """You are an expert Solidity smart contract security auditor.
60
-
61
- Given a contract, identify the ONE vulnerable function and its vulnerability type.
62
-
63
- ## Actions (choose ONE per turn, respond with JSON only):
64
- {"action": "list_functions", "params": {}}
65
- {"action": "get_function_code", "params": {"function_name": "<name>"}}
66
- {"action": "get_function_summary", "params": {"function_name": "<name>"}}
67
- {"action": "get_file_metadata", "params": {}}
68
- {"action": "get_state_variable", "params": {"variable_name": "<name>"}}
69
- {"action": "get_call_graph", "params": {}}
70
- {"action": "submit", "params": {"function_name": "<name>", "vulnerability_type": "<2-3 words>"}}
71
-
72
- ## Strategy:
73
- 1. list_functions first to see the attack surface
74
- 2. Inspect suspicious functions (withdraw, drain, buy, stake, claim, setPrice, bid, finalize)
75
- 3. Look for: reentrancy, missing access control, integer overflow, tx.origin, front-running,
76
- timestamp dependence, denial of service, unchecked return value
77
- 4. Submit when confident
78
-
79
- Respond ONLY with valid JSON. No explanation, no markdown."""
80
-
81
 
82
  def _t1_user_msg(obs: Dict[str, Any]) -> str:
83
  return (
@@ -138,36 +117,6 @@ def run_t1_episode(env: Task1Environment, seed: int, ep: int) -> Dict[str, Any]:
138
  # Task 2 agent
139
  # ─────────────────────────────────────────────────────────────────────────────
140
 
141
- T2_SYSTEM = """You are a formal methods engineer specialising in Solidity smart contracts.
142
-
143
- You will be shown a specific Solidity function. Your task is to write a precise
144
- natural-language property (invariant / postcondition) that describes what the
145
- function guarantees when it succeeds.
146
-
147
- A good property covers:
148
- - What state changes (balances, counters, flags)
149
- - What assets are transferred (ETH, tokens, NFTs)
150
- - What return value is produced (for view functions)
151
- - Under what conditions it reverts
152
-
153
- ## Actions (respond with JSON only, ONE action per turn):
154
- {"action": "get_function_code", "params": {}}
155
- {"action": "get_function_natspec", "params": {}}
156
- {"action": "get_file_natspec", "params": {}}
157
- {"action": "get_related_functions", "params": {}}
158
- {"action": "get_io", "params": {}}
159
- {"action": "get_similar_rule", "params": {}}
160
- {"action": "submit_property", "params": {"property": "<your full property text>"}}
161
-
162
- ## Rules:
163
- - You have ONE submit_property attempt. Make it count.
164
- - Use get_function_natspec and get_io first — they give the most signal.
165
- - get_similar_rule costs more (-0.20) but shows a parallel property from another contract.
166
- - Write 2–4 sentences. Be specific about variable names and amounts.
167
- - Do NOT guess — read the code first.
168
-
169
- Respond ONLY with valid JSON. No markdown, no explanation."""
170
-
171
 
172
  def _t2_user_msg(obs: Dict[str, Any]) -> str:
173
  extra = obs.get("extra", {})
@@ -262,33 +211,6 @@ def run_task2(n: int = NUM_EPISODES) -> Dict[str, Any]:
262
  "avg_grader_score": avg_s, "avg_cumulative_reward": avg_r}
263
 
264
 
265
- T3_SYSTEM = """You are a smart contract security auditor checking rule compliance.
266
-
267
- You are given a Solidity contract and a property (rule) in natural English.
268
- Your task is to find the ONE function that violates this property.
269
-
270
- ## Actions (respond with JSON only, ONE action per turn):
271
- {"action": "list_functions", "params": {}}
272
- {"action": "get_formalized_property", "params": {}}
273
- {"action": "get_function_metadata", "params": {"function_name": "<n>"}}
274
- {"action": "get_function_code", "params": {"function_name": "<n>"}}
275
- {"action": "get_state_variable", "params": {"variable_name": "<n>"}}
276
- {"action": "get_call_graph", "params": {}}
277
- {"action": "submit_function", "params": {"function_name": "<n>"}}
278
-
279
- ## Strategy:
280
- 1. Read the property shown as property_english in the observation.
281
- 2. list_functions to survey candidates.
282
- 3. get_formalized_property for the precise pre/post-condition (cheap: -0.03).
283
- 4. get_function_code on the 1-2 most suspicious functions.
284
- 5. submit_function when confident — ONE attempt only.
285
-
286
- Clues: missing require, no access modifier, unchecked external call, unbounded array,
287
- tx.origin auth, integer overflow, timestamp manipulation, reentrancy ordering.
288
-
289
- Respond ONLY with valid JSON. No markdown, no explanation."""
290
-
291
-
292
  def _t3_user_msg(obs: Dict[str, Any]) -> str:
293
  extra = obs.get("extra", {})
294
  return (
 
32
  from tasks.task2.environment import Task2Environment
33
  from tasks.task3.environment import Task3Environment
34
  from env.schemas import Action, ActionType
35
+ from utils.prompts import T1_SYSTEM, T2_SYSTEM, T3_SYSTEM
36
 
37
  # ─────────────────────────────────────────────────────────────────────────────
38
  # Configuration
 
57
  # Task 1 agent
58
  # ─────────────────────────────────────────────────────────────────────────────
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def _t1_user_msg(obs: Dict[str, Any]) -> str:
62
  return (
 
117
  # Task 2 agent
118
  # ─────────────────────────────────────────────────────────────────────────────
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  def _t2_user_msg(obs: Dict[str, Any]) -> str:
122
  extra = obs.get("extra", {})
 
211
  "avg_grader_score": avg_s, "avg_cumulative_reward": avg_r}
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def _t3_user_msg(obs: Dict[str, Any]) -> str:
215
  extra = obs.get("extra", {})
216
  return (
tasks/task2/actions.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Actions for Task 2: Property Inference.
3
+ """
4
+
5
+ from typing import Any, Dict, Tuple
6
+ from data.data_loader import get_function_by_name, get_related_functions, get_similar_rule
7
+ from env.schemas import ActionType, Reward
8
+
9
+ def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
10
+ """Handle GET_FUNCTION_CODE action."""
11
+ if ctx._is_repeated(qkey):
12
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
13
+
14
+ fn = ctx._target_fn
15
+ name = fn["name"]
16
+ code = fn.get("code", "// no code available")
17
+ return (
18
+ f"// {name}\n{code}",
19
+ Reward(value=-0.06, reason="get_function_code cost"),
20
+ )
21
+
22
+
23
+ def get_function_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
24
+ """Handle GET_FUNCTION_NATSPEC action."""
25
+ if ctx._is_repeated(qkey):
26
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
27
+
28
+ fn = ctx._target_fn
29
+ name = fn["name"]
30
+ natspec = fn.get("natspec") or fn.get("comment") or "No NatSpec available."
31
+ out_prop = fn.get("output_property", "")
32
+ result = f"NatSpec for '{name}':\n{natspec}"
33
+ if out_prop:
34
+ result += f"\n\nExpected output: {out_prop}"
35
+ return result, Reward(value=-0.08, reason="get_function_natspec cost")
36
+
37
+
38
+ def get_file_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
39
+ """Handle GET_FILE_NATSPEC action."""
40
+ if ctx._is_repeated(qkey):
41
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
42
+
43
+ meta = ctx._contract.get("metadata", {})
44
+ natspec = meta.get("natspec") or meta.get("description", "No file NatSpec available.")
45
+ return (
46
+ f"File NatSpec for {ctx._contract['contract_name']}:\n{natspec}",
47
+ Reward(value=-0.03, reason="get_file_natspec cost"),
48
+ )
49
+
50
+
51
+ def get_related_functions_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
52
+ """Handle GET_RELATED_FUNCTIONS action."""
53
+ if ctx._is_repeated(qkey):
54
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
55
+
56
+ name = ctx._target_fn["name"]
57
+ related = get_related_functions(ctx._contract, name)
58
+ if not related:
59
+ text = f"No related functions found for '{name}'."
60
+ else:
61
+ summaries = []
62
+ for rn in related:
63
+ rfn = get_function_by_name(ctx._contract, rn)
64
+ if rfn:
65
+ sig = rfn.get("signature", rn)
66
+ comment = rfn.get("comment", "")
67
+ summaries.append(f" • {sig} — {comment}")
68
+ text = f"Related functions for '{name}':\n" + "\n".join(summaries)
69
+ return text, Reward(value=-0.06, reason="get_related_functions cost")
70
+
71
+
72
+ def get_io(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
73
+ """Handle GET_IO action."""
74
+ if ctx._is_repeated(qkey):
75
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
76
+
77
+ fn = ctx._target_fn
78
+ name = fn["name"]
79
+ params_list = fn.get("parameters", [])
80
+ returns = fn.get("returns", "") or "void"
81
+ out_prop = fn.get("output_property", "")
82
+ visibility = fn.get("visibility", "")
83
+ modifiers = fn.get("modifiers", [])
84
+
85
+ lines = [f"Function: {fn.get('signature', name)}"]
86
+ lines.append(f"Visibility: {visibility}" + (f" Modifiers: {', '.join(modifiers)}" if modifiers else ""))
87
+ if params_list:
88
+ lines.append("Parameters:")
89
+ for p in params_list:
90
+ lines.append(f" • {p['type']} {p['name']}: {p.get('description','')}")
91
+ else:
92
+ lines.append("Parameters: none" + (" (payable)" if "payable" in fn.get("code", "") else ""))
93
+ lines.append(f"Returns: {returns}")
94
+ if out_prop:
95
+ lines.append(f"Expected behaviour: {out_prop}")
96
+ return "\n".join(lines), Reward(value=-0.04, reason="get_io cost")
97
+
98
+ # ! Wrong Function, there is no similar_rule field in the dataset. This function will always return "No similar rule available for this function."
99
+
100
+ def get_similar_rule_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
101
+ """Handle GET_SIMILAR_RULE action."""
102
+ if ctx._is_repeated(qkey):
103
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
104
+
105
+ sr = get_similar_rule(
106
+ ctx._contracts,
107
+ ctx._contract["contract_name"],
108
+ ctx._target_fn["name"],
109
+ )
110
+ if sr is None:
111
+ return (
112
+ "No similar rule available for this function.",
113
+ Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
114
+ )
115
+ lines = [
116
+ f"Similar property from {sr['contract_name']}.{sr['function_name']}():",
117
+ f" {sr['property_hint']}",
118
+ ]
119
+ if sr.get("natspec"):
120
+ lines.append(f"\nFunction NatSpec:\n {sr['natspec']}")
121
+ return "\n".join(lines), Reward(value=-0.20, reason="get_similar_rule cost")
122
+
123
+
124
+ def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
125
+ """Handle SUBMIT_PROPERTY action."""
126
+ if ctx._submitted:
127
+ return (
128
+ "❌ You have already submitted a property for this episode. "
129
+ "Only one submission is allowed.",
130
+ Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
131
+ )
132
+ submitted_text = params.get("property", "").strip()
133
+ if not submitted_text:
134
+ return (
135
+ "Submit requires 'property' key in params with a non-empty string.",
136
+ Reward(value=-0.5, reason="Empty property submission"),
137
+ )
138
+
139
+ ctx._submitted = True
140
+ ctx._done = True
141
+
142
+ score = ctx._grader.grade(submitted_text)
143
+ reward = ctx._grader.reward_for_score(score)
144
+ bd = ctx._grader.breakdown(submitted_text)
145
+
146
+ pct = int(score * 100)
147
+ if score >= 0.85:
148
+ emoji, label = "✅", "EXCELLENT"
149
+ elif score >= 0.60:
150
+ emoji, label = "🟡", "GOOD"
151
+ elif score >= 0.35:
152
+ emoji, label = "🟠", "PARTIAL"
153
+ else:
154
+ emoji, label = "❌", "POOR"
155
+
156
+ msg = (
157
+ f"{emoji} {label} — Score: {score:.2f}/1.00 → Reward: {reward:.2f}/5.00 ({pct}%)\n"
158
+ f"Key concepts matched : {len(bd['key_matched'])}/{len(bd['key_matched'])+len(bd['key_missed'])} "
159
+ f"{bd['key_matched']}\n"
160
+ f"Bonus concepts matched : {len(bd['bonus_matched'])}/{len(bd['bonus_matched'])+len(bd['bonus_missed'])} "
161
+ f"{bd['bonus_matched']}"
162
+ )
163
+ return msg, Reward(
164
+ value=reward,
165
+ reason=f"Property submission score={score:.3f}",
166
+ partial=False,
167
+ )
168
+
169
+
170
+ def unknown_action(ctx: Any, qkey: str, params: Dict, action_type: str) -> Tuple[str, Reward]:
171
+ """Fallback for unknown actions."""
172
+ return (
173
+ f"Unknown action type: '{action_type}'. Valid: {[a.value for a in ActionType]}",
174
+ Reward(value=-0.10, reason="Unknown action"),
175
+ )
tasks/task2/environment.py CHANGED
@@ -27,14 +27,9 @@ from __future__ import annotations
27
 
28
  import random
29
  from typing import Any, Dict, List, Optional, Set
 
30
 
31
- from data.data_loader import (
32
- load_contracts,
33
- sample_property_episode,
34
- get_function_by_name,
35
- get_related_functions,
36
- get_similar_rule,
37
- )
38
  from env.base_env import BaseEnv
39
  from env.schemas import (
40
  Action,
@@ -107,7 +102,7 @@ class Task2Environment(BaseEnv):
107
  f"Function : {self._target_fn['name']} "
108
  f"({self._target_fn.get('signature', '')})\n"
109
  f"Your task : Discover the natural-language property of "
110
- f"'{self._target_fn['name']}' and submit it with submit_property."
111
  ),
112
  )
113
  return ResetResult(observation=obs, info={"task_id": TASK_ID})
@@ -182,159 +177,24 @@ class Task2Environment(BaseEnv):
182
  return False
183
 
184
  def _dispatch(self, action: Action) -> tuple[str, Reward]:
185
- at = action.action_type
186
  params = action.params
187
- qkey = self._qkey(at, params)
188
- fn = self._target_fn
189
- name = fn["name"]
190
-
191
- # ── get_function_code ────────────────────────────────────────────────
192
- if at == ActionType.GET_FUNCTION_CODE:
193
- if self._is_repeated(qkey):
194
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
195
- code = fn.get("code", "// no code available")
196
- return (
197
- f"// {name}\n{code}",
198
- Reward(value=-0.06, reason="get_function_code cost"),
199
- )
200
-
201
- # ── get_function_natspec ─────────────────────────────────────────────
202
- if at == ActionType.GET_FUNCTION_NATSPEC:
203
- if self._is_repeated(qkey):
204
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
205
- natspec = fn.get("natspec") or fn.get("comment") or "No NatSpec available."
206
- # Also include output_property if present
207
- out_prop = fn.get("output_property", "")
208
- result = f"NatSpec for '{name}':\n{natspec}"
209
- if out_prop:
210
- result += f"\n\nExpected output: {out_prop}"
211
- return result, Reward(value=-0.08, reason="get_function_natspec cost")
212
-
213
- # ── get_file_natspec ─────────────────────────────────────────────────
214
- if at == ActionType.GET_FILE_NATSPEC:
215
- if self._is_repeated(qkey):
216
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
217
- meta = self._contract.get("metadata", {})
218
- natspec = meta.get("natspec") or meta.get("description", "No file NatSpec available.")
219
- return (
220
- f"File NatSpec for {self._contract['contract_name']}:\n{natspec}",
221
- Reward(value=-0.03, reason="get_file_natspec cost"),
222
- )
223
-
224
- # ── get_related_functions ────────────────────────────────────────────
225
- if at == ActionType.GET_RELATED_FUNCTIONS:
226
- if self._is_repeated(qkey):
227
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
228
- related = get_related_functions(self._contract, name)
229
- if not related:
230
- text = f"No related functions found for '{name}'."
231
- else:
232
- summaries = []
233
- for rn in related:
234
- rfn = get_function_by_name(self._contract, rn)
235
- if rfn:
236
- sig = rfn.get("signature", rn)
237
- comment = rfn.get("comment", "")
238
- summaries.append(f" • {sig} — {comment}")
239
- text = f"Related functions for '{name}':\n" + "\n".join(summaries)
240
- return text, Reward(value=-0.06, reason="get_related_functions cost")
241
-
242
- # ── get_io ───────────────────────────────────────────────────────────
243
- if at == ActionType.GET_IO:
244
- if self._is_repeated(qkey):
245
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
246
- params_list = fn.get("parameters", [])
247
- returns = fn.get("returns", "") or "void"
248
- out_prop = fn.get("output_property", "")
249
- visibility = fn.get("visibility", "")
250
- modifiers = fn.get("modifiers", [])
251
-
252
- lines = [f"Function: {fn.get('signature', name)}"]
253
- lines.append(f"Visibility: {visibility}" + (f" Modifiers: {', '.join(modifiers)}" if modifiers else ""))
254
- if params_list:
255
- lines.append("Parameters:")
256
- for p in params_list:
257
- lines.append(f" • {p['type']} {p['name']}: {p.get('description','')}")
258
- else:
259
- lines.append("Parameters: none (payable)" if "payable" in fn.get("code","") else "Parameters: none")
260
- lines.append(f"Returns: {returns}")
261
- if out_prop:
262
- lines.append(f"Expected behaviour: {out_prop}")
263
- return "\n".join(lines), Reward(value=-0.04, reason="get_io cost")
264
-
265
- # ── get_similar_rule ─────────────────────────────────────────────────
266
- if at == ActionType.GET_SIMILAR_RULE:
267
- if self._is_repeated(qkey):
268
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
269
- sr = get_similar_rule(
270
- self._contracts,
271
- self._contract["contract_name"],
272
- name,
273
- )
274
- if sr is None:
275
- return (
276
- "No similar rule available for this function.",
277
- Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
278
- )
279
- lines = [
280
- f"Similar property from {sr['contract_name']}.{sr['function_name']}():",
281
- f" {sr['property_hint']}",
282
- ]
283
- if sr.get("natspec"):
284
- lines.append(f"\nFunction NatSpec:\n {sr['natspec']}")
285
- return "\n".join(lines), Reward(value=-0.20, reason="get_similar_rule cost")
286
-
287
- # ── submit_property ──────────────────────────────────────────────────
288
- if at == ActionType.SUBMIT_PROPERTY:
289
- if self._submitted:
290
- return (
291
- "❌ You have already submitted a property for this episode. "
292
- "Only one submission is allowed.",
293
- Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
294
- )
295
- submitted_text = params.get("property", "").strip()
296
- if not submitted_text:
297
- return (
298
- "Submit requires 'property' key in params with a non-empty string.",
299
- Reward(value=-0.5, reason="Empty property submission"),
300
- )
301
-
302
- self._submitted = True
303
- self._done = True
304
-
305
- score = self._grader.grade(submitted_text)
306
- reward = self._grader.reward_for_score(score)
307
- bd = self._grader.breakdown(submitted_text)
308
-
309
- pct = int(score * 100)
310
- if score >= 0.85:
311
- emoji = "✅"
312
- label = "EXCELLENT"
313
- elif score >= 0.60:
314
- emoji = "🟡"
315
- label = "GOOD"
316
- elif score >= 0.35:
317
- emoji = "🟠"
318
- label = "PARTIAL"
319
- else:
320
- emoji = "❌"
321
- label = "POOR"
322
-
323
- msg = (
324
- f"{emoji} {label} — Score: {score:.2f}/1.00 → Reward: {reward:.2f}/5.00 ({pct}%)\n"
325
- f"Key concepts matched : {len(bd['key_matched'])}/{len(bd['key_matched'])+len(bd['key_missed'])} "
326
- f"{bd['key_matched']}\n"
327
- f"Bonus concepts matched : {len(bd['bonus_matched'])}/{len(bd['bonus_matched'])+len(bd['bonus_missed'])} "
328
- f"{bd['bonus_matched']}"
329
- )
330
- return msg, Reward(
331
- value=reward,
332
- reason=f"Property submission score={score:.3f}",
333
- partial=False,
334
- )
335
-
336
- # ── unknown action ────────────────────────────────────────────────────
337
- return (
338
- f"Unknown action type: '{at}'. Valid: {[a.value for a in AVAILABLE_ACTIONS]}",
339
- Reward(value=-0.10, reason="Unknown action"),
340
- )
 
27
 
28
  import random
29
  from typing import Any, Dict, List, Optional, Set
30
+ import actions
31
 
32
+ from data.data_loader import load_contracts, sample_property_episode
 
 
 
 
 
 
33
  from env.base_env import BaseEnv
34
  from env.schemas import (
35
  Action,
 
102
  f"Function : {self._target_fn['name']} "
103
  f"({self._target_fn.get('signature', '')})\n"
104
  f"Your task : Discover the natural-language property of "
105
+ f"'{self._target_fn['name']}' and submit it with submit_property action."
106
  ),
107
  )
108
  return ResetResult(observation=obs, info={"task_id": TASK_ID})
 
177
  return False
178
 
179
  def _dispatch(self, action: Action) -> tuple[str, Reward]:
180
+ at = action.action_type
181
  params = action.params
182
+ qkey = self._qkey(at, params)
183
+
184
+ # Mapping from ActionType to handler function
185
+ # Each handler expects (ctx, qkey, params) and returns (str, Reward)
186
+ handlers = {
187
+ ActionType.GET_FUNCTION_CODE: actions.get_function_code,
188
+ ActionType.GET_FUNCTION_NATSPEC: actions.get_function_natspec,
189
+ ActionType.GET_FILE_NATSPEC: actions.get_file_natspec,
190
+ ActionType.GET_RELATED_FUNCTIONS: actions.get_related_functions_action,
191
+ ActionType.GET_IO: actions.get_io,
192
+ ActionType.GET_SIMILAR_RULE: actions.get_similar_rule_action,
193
+ ActionType.SUBMIT_PROPERTY: actions.submit_property,
194
+ }
195
+
196
+ handler = handlers.get(at)
197
+ if handler is None:
198
+ return actions.unknown_action(self, qkey, params, at)
199
+
200
+ return handler(self, qkey, params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tasks/task3/actions.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task 3: Identify the function that violates a specified property."""
2
+
3
+ from typing import Any, Dict, Tuple
4
+ from data.data_loader import (
5
+ get_function_by_name,
6
+ list_function_names,
7
+ list_state_variable_names,
8
+ get_state_variable_by_name
9
+ )
10
+ from env.schemas import Reward, ActionType
11
+
12
+ def list_functions(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
13
+ """Handle LIST_FUNCTIONS action."""
14
+ if ctx._is_repeated(qkey):
15
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
16
+ names = list_function_names(ctx._contract)
17
+ return (
18
+ f"Functions in {ctx._contract['contract_name']}: {', '.join(names)}",
19
+ Reward(value=-0.05, reason="list_functions cost"),
20
+ )
21
+
22
+
23
+ def get_function_metadata(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
24
+ """Handle GET_FUNCTION_METADATA action."""
25
+ fn_name = params.get("function_name", "")
26
+ if ctx._is_repeated(qkey):
27
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
28
+ fn = get_function_by_name(ctx._contract, fn_name)
29
+ if fn is None:
30
+ return (
31
+ f"Function '{fn_name}' not found. "
32
+ f"Available: {list_function_names(ctx._contract)}",
33
+ Reward(value=-0.05, reason="Unknown function"),
34
+ )
35
+ params_list = fn.get("parameters", [])
36
+ modifiers = fn.get("modifiers", [])
37
+ lines = [
38
+ f"Function : {fn.get('signature', fn_name)}",
39
+ f"Visibility : {fn.get('visibility', 'unknown')}",
40
+ f"Modifiers : {', '.join(modifiers) if modifiers else 'none'}",
41
+ ]
42
+ if params_list:
43
+ lines.append("Parameters :")
44
+ for p in params_list:
45
+ lines.append(f" {p['type']} {p['name']} — {p.get('description','')}")
46
+ else:
47
+ lines.append("Parameters : none")
48
+ lines.append(f"Returns : {fn.get('returns','') or 'void'}")
49
+ lines.append(f"Summary : {fn.get('comment','')}")
50
+ return "\n".join(lines), Reward(value=-0.05, reason="get_function_metadata cost")
51
+
52
+
53
+ def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
54
+ """Handle GET_FUNCTION_CODE action."""
55
+ fn_name = params.get("function_name", "")
56
+ if ctx._is_repeated(qkey):
57
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
58
+ fn = get_function_by_name(ctx._contract, fn_name)
59
+ if fn is None:
60
+ return (
61
+ f"Function '{fn_name}' not found. "
62
+ f"Available: {list_function_names(ctx._contract)}",
63
+ Reward(value=-0.10, reason="Unknown function — extra penalty"),
64
+ )
65
+ code = fn.get("code", "// no code available")
66
+ return (
67
+ f"// {fn_name}\n{code}",
68
+ Reward(value=-0.10, reason="get_function_code cost"),
69
+ )
70
+
71
+
72
+ def get_state_variable(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
73
+ """Handle GET_STATE_VARIABLE action."""
74
+ var_name = params.get("variable_name", "")
75
+ if ctx._is_repeated(qkey):
76
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
77
+ if not var_name:
78
+ names = list_state_variable_names(ctx._contract)
79
+ return (
80
+ f"State variables: {', '.join(names)}",
81
+ Reward(value=-0.05, reason="Listed state variables"),
82
+ )
83
+ sv = get_state_variable_by_name(ctx._contract, var_name)
84
+ if sv is None:
85
+ return (
86
+ f"Variable '{var_name}' not found.",
87
+ Reward(value=-0.05, reason="Unknown state variable"),
88
+ )
89
+ return (
90
+ f"{sv['type']} {sv['visibility']} {sv['name']}: {sv.get('description','')}",
91
+ Reward(value=-0.05, reason="get_state_variable cost"),
92
+ )
93
+
94
+
95
+ def get_call_graph(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
96
+ """Handle GET_CALL_GRAPH action."""
97
+ if ctx._is_repeated(qkey):
98
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
99
+ cg = ctx._contract.get("call_graph", {})
100
+ cg_str = "; ".join(
101
+ f"{fn} → [{', '.join(callees)}]" for fn, callees in cg.items()
102
+ )
103
+ return (
104
+ f"Call graph: {cg_str}",
105
+ Reward(value=-0.08, reason="get_call_graph cost"),
106
+ )
107
+
108
+ # TODO: Need to change this, property_formal doesn't exists
109
+ def get_formalized_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
110
+ """Handle GET_FORMALIZED_PROPERTY action."""
111
+ if ctx._is_repeated(qkey):
112
+ return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
113
+ formal = ctx._target_fn.get("task3", {}).get("property_formal", "")
114
+ if not formal:
115
+ formal = "No formal specification available for this property."
116
+ return (
117
+ f"Formal property:\n{formal}",
118
+ Reward(value=-0.03, reason="get_formalized_property cost"),
119
+ )
120
+
121
+
122
+ def submit_function(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
123
+ """Handle SUBMIT_FUNCTION action."""
124
+ if ctx._submitted:
125
+ return (
126
+ "❌ You have already submitted for this episode. "
127
+ "Only ONE submission is allowed.",
128
+ Reward(value=-1.0, reason="Second submit_function attempt", partial=False),
129
+ )
130
+ fn_name = params.get("function_name", "").strip()
131
+ if not fn_name:
132
+ return (
133
+ "submit_function requires 'function_name' in params.",
134
+ Reward(value=-0.5, reason="Malformed submission"),
135
+ )
136
+
137
+ ctx._submitted = True
138
+ ctx._done = True
139
+ score, reward_val = ctx._grader.grade_and_reward(fn_name)
140
+ correct = ctx._grader.get_canonical_answer()
141
+
142
+ if score >= 0.9:
143
+ msg = (
144
+ f"✅ CORRECT! '{fn_name}' is the function that violates the property. "
145
+ f"Score: 1.0 → Reward: +{reward_val:.1f}"
146
+ )
147
+ elif score >= 0.2:
148
+ msg = (
149
+ f"🟡 PARTIAL. '{fn_name}' is a subfunction of the target — "
150
+ f"closely related but not the primary rule-breaker. "
151
+ f"Score: 0.3 → Reward: +{reward_val:.1f}. "
152
+ f"Correct answer: '{correct['target_function']}'."
153
+ )
154
+ else:
155
+ msg = (
156
+ f"❌ INCORRECT. '{fn_name}' does not violate the property. "
157
+ f"Score: 0.0 → Reward: {reward_val:.1f}. "
158
+ f"Correct answer: '{correct['target_function']}'."
159
+ )
160
+
161
+ return msg, Reward(
162
+ value=reward_val,
163
+ reason=f"submit_function score={score:.1f}",
164
+ partial=False,
165
+ )
166
+
167
+
168
+ def unknown_action(ctx: Any, qkey: str, params: Dict, action_type: str) -> Tuple[str, Reward]:
169
+ """Fallback for unknown actions."""
170
+ return (
171
+ f"Unknown action '{action_type}'. Valid: {[a.value for a in ActionType]}",
172
+ Reward(value=-0.10, reason="Unknown action"),
173
+ )
tasks/task3/environment.py CHANGED
@@ -35,15 +35,9 @@ from __future__ import annotations
35
 
36
  import random
37
  from typing import Any, Dict, List, Optional, Set
 
38
 
39
- from data.data_loader import (
40
- load_contracts,
41
- sample_task3_episode,
42
- get_function_by_name,
43
- get_state_variable_by_name,
44
- list_function_names,
45
- list_state_variable_names,
46
- )
47
  from env.base_env import BaseEnv
48
  from env.schemas import (
49
  Action,
@@ -188,163 +182,25 @@ class Task3Environment(BaseEnv):
188
  return True
189
  self._seen.add(key)
190
  return False
191
-
192
  def _dispatch(self, action: Action) -> tuple[str, Reward]:
193
- at = action.action_type
194
  params = action.params
195
- qkey = self._qkey(at, params)
196
-
197
- # ── list_functions ────────────────────────────────────────────────────
198
- if at == ActionType.LIST_FUNCTIONS:
199
- if self._is_repeated(qkey):
200
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
201
- names = list_function_names(self._contract)
202
- return (
203
- f"Functions in {self._contract['contract_name']}: {', '.join(names)}",
204
- Reward(value=-0.05, reason="list_functions cost"),
205
- )
206
-
207
- # ── get_function_metadata ─────────────────────────────────────────────
208
- if at == ActionType.GET_FUNCTION_METADATA:
209
- fn_name = params.get("function_name", "")
210
- if self._is_repeated(qkey):
211
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
212
- fn = get_function_by_name(self._contract, fn_name)
213
- if fn is None:
214
- return (
215
- f"Function '{fn_name}' not found. "
216
- f"Available: {list_function_names(self._contract)}",
217
- Reward(value=-0.05, reason="Unknown function"),
218
- )
219
- params_list = fn.get("parameters", [])
220
- modifiers = fn.get("modifiers", [])
221
- lines = [
222
- f"Function : {fn.get('signature', fn_name)}",
223
- f"Visibility : {fn.get('visibility', 'unknown')}",
224
- f"Modifiers : {', '.join(modifiers) if modifiers else 'none'}",
225
- ]
226
- if params_list:
227
- lines.append("Parameters :")
228
- for p in params_list:
229
- lines.append(f" {p['type']} {p['name']} — {p.get('description','')}")
230
- else:
231
- lines.append("Parameters : none")
232
- lines.append(f"Returns : {fn.get('returns','') or 'void'}")
233
- lines.append(f"Summary : {fn.get('comment','')}")
234
- return "\n".join(lines), Reward(value=-0.05, reason="get_function_metadata cost")
235
-
236
- # ── get_function_code ─────────────────────────────────────────────────
237
- if at == ActionType.GET_FUNCTION_CODE:
238
- fn_name = params.get("function_name", "")
239
- if self._is_repeated(qkey):
240
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
241
- fn = get_function_by_name(self._contract, fn_name)
242
- if fn is None:
243
- return (
244
- f"Function '{fn_name}' not found. "
245
- f"Available: {list_function_names(self._contract)}",
246
- Reward(value=-0.10, reason="Unknown function — extra penalty"),
247
- )
248
- code = fn.get("code", "// no code available")
249
- return (
250
- f"// {fn_name}\n{code}",
251
- Reward(value=-0.10, reason="get_function_code cost"),
252
- )
253
-
254
- # ── get_state_variables ───────────────────────────────────────────────
255
- if at == ActionType.GET_STATE_VARIABLE:
256
- var_name = params.get("variable_name", "")
257
- if self._is_repeated(qkey):
258
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
259
- if not var_name:
260
- names = list_state_variable_names(self._contract)
261
- return (
262
- f"State variables: {', '.join(names)}",
263
- Reward(value=-0.05, reason="Listed state variables"),
264
- )
265
- sv = get_state_variable_by_name(self._contract, var_name)
266
- if sv is None:
267
- return (
268
- f"Variable '{var_name}' not found.",
269
- Reward(value=-0.05, reason="Unknown state variable"),
270
- )
271
- return (
272
- f"{sv['type']} {sv['visibility']} {sv['name']}: {sv.get('description','')}",
273
- Reward(value=-0.05, reason="get_state_variable cost"),
274
- )
275
-
276
- # ── get_call_graph ────────────────────────────────────────────────────
277
- if at == ActionType.GET_CALL_GRAPH:
278
- if self._is_repeated(qkey):
279
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
280
- cg = self._contract.get("call_graph", {})
281
- cg_str = "; ".join(
282
- f"{fn} → [{', '.join(callees)}]" for fn, callees in cg.items()
283
- )
284
- return (
285
- f"Call graph: {cg_str}",
286
- Reward(value=-0.08, reason="get_call_graph cost"),
287
- )
288
-
289
- # ── get_formalized_property ───────────────────────────────────────────
290
- if at == ActionType.GET_FORMALIZED_PROPERTY:
291
- if self._is_repeated(qkey):
292
- return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
293
- formal = self._target_fn.get("task3", {}).get("property_formal", "")
294
- if not formal:
295
- formal = "No formal specification available for this property."
296
- return (
297
- f"Formal property:\n{formal}",
298
- Reward(value=-0.03, reason="get_formalized_property cost"),
299
- )
300
-
301
- # ── submit_function ───────────────────────────────────────────────────
302
- if at == ActionType.SUBMIT_FUNCTION:
303
- if self._submitted:
304
- return (
305
- "❌ You have already submitted for this episode. "
306
- "Only ONE submission is allowed.",
307
- Reward(value=-1.0, reason="Second submit_function attempt", partial=False),
308
- )
309
- fn_name = params.get("function_name", "").strip()
310
- if not fn_name:
311
- return (
312
- "submit_function requires 'function_name' in params.",
313
- Reward(value=-0.5, reason="Malformed submission"),
314
- )
315
-
316
- self._submitted = True
317
- self._done = True
318
- score, reward_val = self._grader.grade_and_reward(fn_name)
319
- correct = self._grader.get_canonical_answer()
320
-
321
- if score >= 0.9:
322
- msg = (
323
- f"✅ CORRECT! '{fn_name}' is the function that violates the property. "
324
- f"Score: 1.0 → Reward: +{reward_val:.1f}"
325
- )
326
- elif score >= 0.2:
327
- msg = (
328
- f"🟡 PARTIAL. '{fn_name}' is a subfunction of the target — "
329
- f"closely related but not the primary rule-breaker. "
330
- f"Score: 0.3 → Reward: +{reward_val:.1f}. "
331
- f"Correct answer: '{correct['target_function']}'."
332
- )
333
- else:
334
- msg = (
335
- f"❌ INCORRECT. '{fn_name}' does not violate the property. "
336
- f"Score: 0.0 → Reward: {reward_val:.1f}. "
337
- f"Correct answer: '{correct['target_function']}'."
338
- )
339
-
340
- return msg, Reward(
341
- value=reward_val,
342
- reason=f"submit_function score={score:.1f}",
343
- partial=False,
344
- )
345
-
346
- # ── unknown action ────────────────────────────────────────────────────
347
- return (
348
- f"Unknown action '{at}'. Valid: {[a.value for a in AVAILABLE_ACTIONS]}",
349
- Reward(value=-0.10, reason="Unknown action"),
350
- )
 
35
 
36
  import random
37
  from typing import Any, Dict, List, Optional, Set
38
+ import actions
39
 
40
+ from data.data_loader import load_contracts, sample_task3_episode
 
 
 
 
 
 
 
41
  from env.base_env import BaseEnv
42
  from env.schemas import (
43
  Action,
 
182
  return True
183
  self._seen.add(key)
184
  return False
185
+
186
  def _dispatch(self, action: Action) -> tuple[str, Reward]:
187
+ at = action.action_type
188
  params = action.params
189
+ qkey = self._qkey(at, params)
190
+
191
+ # Mapping from ActionType to handler function
192
+ handlers = {
193
+ ActionType.LIST_FUNCTIONS: actions.list_functions,
194
+ ActionType.GET_FUNCTION_METADATA: actions.get_function_metadata,
195
+ ActionType.GET_FUNCTION_CODE: actions.get_function_code,
196
+ ActionType.GET_STATE_VARIABLE: actions.get_state_variable,
197
+ ActionType.GET_CALL_GRAPH: actions.get_call_graph,
198
+ ActionType.GET_FORMALIZED_PROPERTY: actions.get_formalized_property,
199
+ ActionType.SUBMIT_FUNCTION: actions.submit_function,
200
+ }
201
+
202
+ handler = handlers.get(at)
203
+ if handler is None:
204
+ return actions.unknown_action(self, qkey, params, at)
205
+
206
+ return handler(self, qkey, params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/prompts.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ T1_SYSTEM = """You are an expert Solidity smart contract security auditor.
2
+
3
+ Given a contract, identify the ONE vulnerable function and its vulnerability type.
4
+
5
+ ## Actions (choose ONE per turn, respond with JSON only):
6
+ {"action": "list_functions", "params": {}}
7
+ {"action": "get_function_code", "params": {"function_name": "<name>"}}
8
+ {"action": "get_function_summary", "params": {"function_name": "<name>"}}
9
+ {"action": "get_file_metadata", "params": {}}
10
+ {"action": "get_state_variable", "params": {"variable_name": "<name>"}}
11
+ {"action": "get_call_graph", "params": {}}
12
+ {"action": "submit", "params": {"function_name": "<name>", "vulnerability_type": "<2-3 words>"}}
13
+
14
+ ## Strategy:
15
+ 1. list_functions first to see the attack surface
16
+ 2. Inspect suspicious functions (withdraw, drain, buy, stake, claim, setPrice, bid, finalize)
17
+ 3. Look for: reentrancy, missing access control, integer overflow, tx.origin, front-running,
18
+ timestamp dependence, denial of service, unchecked return value
19
+ 4. Submit when confident
20
+
21
+ Respond ONLY with valid JSON. No explanation, no markdown."""
22
+
23
+ T2_SYSTEM = """You are a formal methods engineer specialising in Solidity smart contracts.
24
+
25
+ You will be shown a specific Solidity function. Your task is to write a precise
26
+ natural-language property (invariant / postcondition) that describes what the
27
+ function guarantees when it succeeds.
28
+
29
+ A good property covers:
30
+ - What state changes (balances, counters, flags)
31
+ - What assets are transferred (ETH, tokens, NFTs)
32
+ - What return value is produced (for view functions)
33
+ - Under what conditions it reverts
34
+
35
+ ## Actions (respond with JSON only, ONE action per turn):
36
+ {"action": "get_function_code", "params": {}}
37
+ {"action": "get_function_natspec", "params": {}}
38
+ {"action": "get_file_natspec", "params": {}}
39
+
40
+ def _t3_user_msg(obs: Dict[str, Any]) -> str:
41
+ extra = obs.get("extra", {})
42
+ return (
43
+ f"Contract : {obs['contract_name']}\n"
44
+ f"Property : {extra.get('property_english', '(no property)')}\n"
45
+ f"Step: {obs['step_count']} | Reward: {obs['cumulative_reward']:.2f}\n\n"
46
+ f"Last action: {obs['last_action'] or 'None'}\n"
47
+ f"Result:\n{obs['last_action_result'] or 'Episode started.'}"
48
+ )
49
+
50
+
51
+ def run_t3_episode(env: Task3Environment, seed: int, ep: int) -> Dict[str, Any]:
52
+ r = env.reset(seed=seed)
53
+ obs = r.observation.model_dump()
54
+ prop_preview = obs['extra'].get('property_english', '')[:55]
55
+ print(f" ep={ep} seed={seed} {obs['contract_name']} \"{prop_preview}...\"")
56
+
57
+ messages = [{"role": "system", "content": T3_SYSTEM}]
58
+ grader_score = 0.0
59
+ cum_reward = 0.0
60
+
61
+ for step in range(15):
62
+ messages.append({"role": "user", "content": _t3_user_msg(obs)})
63
+
64
+ {"action": "get_related_functions", "params": {}}
65
+ {"action": "get_io", "params": {}}
66
+ {"action": "get_similar_rule", "params": {}}
67
+ {"action": "submit_property", "params": {"property": "<your full property text>"}}
68
+
69
+ ## Rules:
70
+ - You have ONE submit_property attempt. Make it count.
71
+ - Use get_function_natspec and get_io first — they give the most signal.
72
+ - get_similar_rule costs more (-0.20) but shows a parallel property from another contract.
73
+ - Write 2–4 sentences. Be specific about variable names and amounts.
74
+ - Do NOT guess — read the code first.
75
+
76
+ Respond ONLY with valid JSON. No markdown, no explanation."""
77
+
78
+
79
+ T3_SYSTEM = """You are a smart contract security auditor checking rule compliance.
80
+
81
+ You are given a Solidity contract and a property (rule) in natural English.
82
+ Your task is to find the ONE function that violates this property.
83
+
84
+ ## Actions (respond with JSON only, ONE action per turn):
85
+ {"action": "list_functions", "params": {}}
86
+ {"action": "get_formalized_property", "params": {}}
87
+ {"action": "get_function_metadata", "params": {"function_name": "<n>"}}
88
+ {"action": "get_function_code", "params": {"function_name": "<n>"}}
89
+ {"action": "get_state_variable", "params": {"variable_name": "<n>"}}
90
+ {"action": "get_call_graph", "params": {}}
91
+ {"action": "submit_function", "params": {"function_name": "<n>"}}
92
+
93
+ ## Strategy:
94
+ 1. Read the property shown as property_english in the observation.
95
+ 2. list_functions to survey candidates.
96
+ 3. get_formalized_property for the precise pre/post-condition (cheap: -0.03).
97
+ 4. get_function_code on the 1-2 most suspicious functions.
98
+ 5. submit_function when confident — ONE attempt only.
99
+
100
+ Clues: missing require, no access modifier, unchecked external call, unbounded array,
101
+ tx.origin auth, integer overflow, timestamp manipulation, reentrancy ordering.
102
+
103
+ Respond ONLY with valid JSON. No markdown, no explanation."""