Spaces:

Codex47
/

SmartContractAudit

Running

App Files Files Community

ajaxwin commited on 15 days ago

Commit

e8c9acc

1 Parent(s): 49df6d3

separated actions, prompts

Browse files

Files changed (7) hide show

data/data_loader.py +3 -1
inference.py +1 -79
tasks/task2/actions.py +175 -0
tasks/task2/environment.py +23 -163
tasks/task3/actions.py +173 -0
tasks/task3/environment.py +22 -166
utils/prompts.py +103 -0

data/data_loader.py CHANGED Viewed

@@ -156,6 +156,8 @@ def get_related_functions(
     return sorted(related)
 def get_similar_rule(
     contracts: List[Dict[str, Any]],
     current_contract_name: str,
@@ -172,7 +174,7 @@ def get_similar_rule(
     for contract in contracts:
         if contract["contract_name"] == current_contract_name:
             fn = get_function_by_name(contract, current_function_name)
-            if fn and fn.get("property") and fn["property"].get("similar_rule"):
                 sr = fn["property"]["similar_rule"]
                 # Look up the referenced function's natspec
                 for c2 in contracts:

     return sorted(related)
+# ! Function is completely wrong
 def get_similar_rule(
     contracts: List[Dict[str, Any]],
     current_contract_name: str,
     for contract in contracts:
         if contract["contract_name"] == current_contract_name:
             fn = get_function_by_name(contract, current_function_name)
+            if fn and fn.get("property") and fn["property"].get("similar_rule"):            # ! There is no property or similar_rule field
                 sr = fn["property"]["similar_rule"]
                 # Look up the referenced function's natspec
                 for c2 in contracts:

inference.py CHANGED Viewed

@@ -32,6 +32,7 @@ from tasks.task1.environment import Task1Environment
 from tasks.task2.environment import Task2Environment
 from tasks.task3.environment import Task3Environment
 from env.schemas import Action, ActionType
 # ─────────────────────────────────────────────────────────────────────────────
 # Configuration
@@ -56,28 +57,6 @@ client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)
 # Task 1 agent
 # ─────────────────────────────────────────────────────────────────────────────
-T1_SYSTEM = """You are an expert Solidity smart contract security auditor.
-Given a contract, identify the ONE vulnerable function and its vulnerability type.
-## Actions (choose ONE per turn, respond with JSON only):
-{"action": "list_functions",       "params": {}}
-{"action": "get_function_code",    "params": {"function_name": "<name>"}}
-{"action": "get_function_summary", "params": {"function_name": "<name>"}}
-{"action": "get_file_metadata",    "params": {}}
-{"action": "get_state_variable",   "params": {"variable_name": "<name>"}}
-{"action": "get_call_graph",       "params": {}}
-{"action": "submit",               "params": {"function_name": "<name>", "vulnerability_type": "<2-3 words>"}}
-## Strategy:
-1. list_functions first to see the attack surface
-2. Inspect suspicious functions (withdraw, drain, buy, stake, claim, setPrice, bid, finalize)
-3. Look for: reentrancy, missing access control, integer overflow, tx.origin, front-running,
-   timestamp dependence, denial of service, unchecked return value
-4. Submit when confident
-Respond ONLY with valid JSON. No explanation, no markdown."""
 def _t1_user_msg(obs: Dict[str, Any]) -> str:
     return (
@@ -138,36 +117,6 @@ def run_t1_episode(env: Task1Environment, seed: int, ep: int) -> Dict[str, Any]:
 # Task 2 agent
 # ─────────────────────────────────────────────────────────────────────────────
-T2_SYSTEM = """You are a formal methods engineer specialising in Solidity smart contracts.
-You will be shown a specific Solidity function. Your task is to write a precise
-natural-language property (invariant / postcondition) that describes what the
-function guarantees when it succeeds.
-A good property covers:
-  - What state changes (balances, counters, flags)
-  - What assets are transferred (ETH, tokens, NFTs)
-  - What return value is produced (for view functions)
-  - Under what conditions it reverts
-## Actions (respond with JSON only, ONE action per turn):
-{"action": "get_function_code",     "params": {}}
-{"action": "get_function_natspec",  "params": {}}
-{"action": "get_file_natspec",      "params": {}}
-{"action": "get_related_functions", "params": {}}
-{"action": "get_io",                "params": {}}
-{"action": "get_similar_rule",      "params": {}}
-{"action": "submit_property",       "params": {"property": "<your full property text>"}}
-## Rules:
-- You have ONE submit_property attempt. Make it count.
-- Use get_function_natspec and get_io first — they give the most signal.
-- get_similar_rule costs more (-0.20) but shows a parallel property from another contract.
-- Write 2–4 sentences. Be specific about variable names and amounts.
-- Do NOT guess — read the code first.
-Respond ONLY with valid JSON. No markdown, no explanation."""
 def _t2_user_msg(obs: Dict[str, Any]) -> str:
     extra = obs.get("extra", {})
@@ -262,33 +211,6 @@ def run_task2(n: int = NUM_EPISODES) -> Dict[str, Any]:
             "avg_grader_score": avg_s, "avg_cumulative_reward": avg_r}
-T3_SYSTEM = """You are a smart contract security auditor checking rule compliance.
-You are given a Solidity contract and a property (rule) in natural English.
-Your task is to find the ONE function that violates this property.
-## Actions (respond with JSON only, ONE action per turn):
-{"action": "list_functions",          "params": {}}
-{"action": "get_formalized_property", "params": {}}
-{"action": "get_function_metadata",   "params": {"function_name": "<n>"}}
-{"action": "get_function_code",       "params": {"function_name": "<n>"}}
-{"action": "get_state_variable",      "params": {"variable_name": "<n>"}}
-{"action": "get_call_graph",          "params": {}}
-{"action": "submit_function",         "params": {"function_name": "<n>"}}
-## Strategy:
-1. Read the property shown as property_english in the observation.
-2. list_functions to survey candidates.
-3. get_formalized_property for the precise pre/post-condition (cheap: -0.03).
-4. get_function_code on the 1-2 most suspicious functions.
-5. submit_function when confident — ONE attempt only.
-Clues: missing require, no access modifier, unchecked external call, unbounded array,
-tx.origin auth, integer overflow, timestamp manipulation, reentrancy ordering.
-Respond ONLY with valid JSON. No markdown, no explanation."""
 def _t3_user_msg(obs: Dict[str, Any]) -> str:
     extra = obs.get("extra", {})
     return (

 from tasks.task2.environment import Task2Environment
 from tasks.task3.environment import Task3Environment
 from env.schemas import Action, ActionType
+from utils.prompts import T1_SYSTEM, T2_SYSTEM, T3_SYSTEM
 # ─────────────────────────────────────────────────────────────────────────────
 # Configuration
 # Task 1 agent
 # ─────────────────────────────────────────────────────────────────────────────
 def _t1_user_msg(obs: Dict[str, Any]) -> str:
     return (
 # Task 2 agent
 # ─────────────────────────────────────────────────────────────────────────────
 def _t2_user_msg(obs: Dict[str, Any]) -> str:
     extra = obs.get("extra", {})
             "avg_grader_score": avg_s, "avg_cumulative_reward": avg_r}
 def _t3_user_msg(obs: Dict[str, Any]) -> str:
     extra = obs.get("extra", {})
     return (

tasks/task2/actions.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+Actions for Task 2: Property Inference.
+"""
+from typing import Any, Dict, Tuple
+from data.data_loader import get_function_by_name, get_related_functions, get_similar_rule
+from env.schemas import ActionType, Reward
+def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FUNCTION_CODE action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    fn = ctx._target_fn
+    name = fn["name"]
+    code = fn.get("code", "// no code available")
+    return (
+        f"// {name}\n{code}",
+        Reward(value=-0.06, reason="get_function_code cost"),
+    )
+def get_function_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FUNCTION_NATSPEC action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    fn = ctx._target_fn
+    name = fn["name"]
+    natspec = fn.get("natspec") or fn.get("comment") or "No NatSpec available."
+    out_prop = fn.get("output_property", "")
+    result = f"NatSpec for '{name}':\n{natspec}"
+    if out_prop:
+        result += f"\n\nExpected output: {out_prop}"
+    return result, Reward(value=-0.08, reason="get_function_natspec cost")
+def get_file_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FILE_NATSPEC action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    meta = ctx._contract.get("metadata", {})
+    natspec = meta.get("natspec") or meta.get("description", "No file NatSpec available.")
+    return (
+        f"File NatSpec for {ctx._contract['contract_name']}:\n{natspec}",
+        Reward(value=-0.03, reason="get_file_natspec cost"),
+    )
+def get_related_functions_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_RELATED_FUNCTIONS action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    name = ctx._target_fn["name"]
+    related = get_related_functions(ctx._contract, name)
+    if not related:
+        text = f"No related functions found for '{name}'."
+    else:
+        summaries = []
+        for rn in related:
+            rfn = get_function_by_name(ctx._contract, rn)
+            if rfn:
+                sig = rfn.get("signature", rn)
+                comment = rfn.get("comment", "")
+                summaries.append(f"  • {sig} — {comment}")
+        text = f"Related functions for '{name}':\n" + "\n".join(summaries)
+    return text, Reward(value=-0.06, reason="get_related_functions cost")
+def get_io(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_IO action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    fn = ctx._target_fn
+    name = fn["name"]
+    params_list = fn.get("parameters", [])
+    returns = fn.get("returns", "") or "void"
+    out_prop = fn.get("output_property", "")
+    visibility = fn.get("visibility", "")
+    modifiers = fn.get("modifiers", [])
+    lines = [f"Function: {fn.get('signature', name)}"]
+    lines.append(f"Visibility: {visibility}" + (f"  Modifiers: {', '.join(modifiers)}" if modifiers else ""))
+    if params_list:
+        lines.append("Parameters:")
+        for p in params_list:
+            lines.append(f"  • {p['type']} {p['name']}: {p.get('description','')}")
+    else:
+        lines.append("Parameters: none" + (" (payable)" if "payable" in fn.get("code", "") else ""))
+    lines.append(f"Returns: {returns}")
+    if out_prop:
+        lines.append(f"Expected behaviour: {out_prop}")
+    return "\n".join(lines), Reward(value=-0.04, reason="get_io cost")
+# ! Wrong Function, there is no similar_rule field in the dataset. This function will always return "No similar rule available for this function."
+def get_similar_rule_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_SIMILAR_RULE action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    sr = get_similar_rule(
+        ctx._contracts,
+        ctx._contract["contract_name"],
+        ctx._target_fn["name"],
+    )
+    if sr is None:
+        return (
+            "No similar rule available for this function.",
+            Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
+        )
+    lines = [
+        f"Similar property from {sr['contract_name']}.{sr['function_name']}():",
+        f"  {sr['property_hint']}",
+    ]
+    if sr.get("natspec"):
+        lines.append(f"\nFunction NatSpec:\n  {sr['natspec']}")
+    return "\n".join(lines), Reward(value=-0.20, reason="get_similar_rule cost")
+def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle SUBMIT_PROPERTY action."""
+    if ctx._submitted:
+        return (
+            "❌ You have already submitted a property for this episode. "
+            "Only one submission is allowed.",
+            Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
+        )
+    submitted_text = params.get("property", "").strip()
+    if not submitted_text:
+        return (
+            "Submit requires 'property' key in params with a non-empty string.",
+            Reward(value=-0.5, reason="Empty property submission"),
+        )
+    ctx._submitted = True
+    ctx._done = True
+    score = ctx._grader.grade(submitted_text)
+    reward = ctx._grader.reward_for_score(score)
+    bd = ctx._grader.breakdown(submitted_text)
+    pct = int(score * 100)
+    if score >= 0.85:
+        emoji, label = "✅", "EXCELLENT"
+    elif score >= 0.60:
+        emoji, label = "🟡", "GOOD"
+    elif score >= 0.35:
+        emoji, label = "🟠", "PARTIAL"
+    else:
+        emoji, label = "❌", "POOR"
+    msg = (
+        f"{emoji} {label} — Score: {score:.2f}/1.00 → Reward: {reward:.2f}/5.00  ({pct}%)\n"
+        f"Key concepts matched   : {len(bd['key_matched'])}/{len(bd['key_matched'])+len(bd['key_missed'])}  "
+        f"{bd['key_matched']}\n"
+        f"Bonus concepts matched : {len(bd['bonus_matched'])}/{len(bd['bonus_matched'])+len(bd['bonus_missed'])}  "
+        f"{bd['bonus_matched']}"
+    )
+    return msg, Reward(
+        value=reward,
+        reason=f"Property submission score={score:.3f}",
+        partial=False,
+    )
+def unknown_action(ctx: Any, qkey: str, params: Dict, action_type: str) -> Tuple[str, Reward]:
+    """Fallback for unknown actions."""
+    return (
+        f"Unknown action type: '{action_type}'. Valid: {[a.value for a in ActionType]}",
+        Reward(value=-0.10, reason="Unknown action"),
+    )

tasks/task2/environment.py CHANGED Viewed

@@ -27,14 +27,9 @@ from __future__ import annotations
 import random
 from typing import Any, Dict, List, Optional, Set
-from data.data_loader import (
-    load_contracts,
-    sample_property_episode,
-    get_function_by_name,
-    get_related_functions,
-    get_similar_rule,
-)
 from env.base_env import BaseEnv
 from env.schemas import (
     Action,
@@ -107,7 +102,7 @@ class Task2Environment(BaseEnv):
                 f"Function  : {self._target_fn['name']}  "
                 f"({self._target_fn.get('signature', '')})\n"
                 f"Your task : Discover the natural-language property of "
-                f"'{self._target_fn['name']}' and submit it with submit_property."
             ),
         )
         return ResetResult(observation=obs, info={"task_id": TASK_ID})
@@ -182,159 +177,24 @@ class Task2Environment(BaseEnv):
         return False
     def _dispatch(self, action: Action) -> tuple[str, Reward]:
-        at     = action.action_type
         params = action.params
-        qkey   = self._qkey(at, params)
-        fn     = self._target_fn
-        name   = fn["name"]
-        # ── get_function_code ────────────────────────────────────────────────
-        if at == ActionType.GET_FUNCTION_CODE:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            code = fn.get("code", "// no code available")
-            return (
-                f"// {name}\n{code}",
-                Reward(value=-0.06, reason="get_function_code cost"),
-            )
-        # ── get_function_natspec ─────────────────────────────────────────────
-        if at == ActionType.GET_FUNCTION_NATSPEC:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            natspec = fn.get("natspec") or fn.get("comment") or "No NatSpec available."
-            # Also include output_property if present
-            out_prop = fn.get("output_property", "")
-            result = f"NatSpec for '{name}':\n{natspec}"
-            if out_prop:
-                result += f"\n\nExpected output: {out_prop}"
-            return result, Reward(value=-0.08, reason="get_function_natspec cost")
-        # ── get_file_natspec ─────────────────────────────────────────────────
-        if at == ActionType.GET_FILE_NATSPEC:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            meta = self._contract.get("metadata", {})
-            natspec = meta.get("natspec") or meta.get("description", "No file NatSpec available.")
-            return (
-                f"File NatSpec for {self._contract['contract_name']}:\n{natspec}",
-                Reward(value=-0.03, reason="get_file_natspec cost"),
-            )
-        # ── get_related_functions ────────────────────────────────────────────
-        if at == ActionType.GET_RELATED_FUNCTIONS:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            related = get_related_functions(self._contract, name)
-            if not related:
-                text = f"No related functions found for '{name}'."
-            else:
-                summaries = []
-                for rn in related:
-                    rfn = get_function_by_name(self._contract, rn)
-                    if rfn:
-                        sig = rfn.get("signature", rn)
-                        comment = rfn.get("comment", "")
-                        summaries.append(f"  • {sig} — {comment}")
-                text = f"Related functions for '{name}':\n" + "\n".join(summaries)
-            return text, Reward(value=-0.06, reason="get_related_functions cost")
-        # ── get_io ───────────────────────────────────────────────────────────
-        if at == ActionType.GET_IO:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            params_list = fn.get("parameters", [])
-            returns     = fn.get("returns", "") or "void"
-            out_prop    = fn.get("output_property", "")
-            visibility  = fn.get("visibility", "")
-            modifiers   = fn.get("modifiers", [])
-            lines = [f"Function: {fn.get('signature', name)}"]
-            lines.append(f"Visibility: {visibility}" + (f"  Modifiers: {', '.join(modifiers)}" if modifiers else ""))
-            if params_list:
-                lines.append("Parameters:")
-                for p in params_list:
-                    lines.append(f"  • {p['type']} {p['name']}: {p.get('description','')}")
-            else:
-                lines.append("Parameters: none (payable)" if "payable" in fn.get("code","") else "Parameters: none")
-            lines.append(f"Returns: {returns}")
-            if out_prop:
-                lines.append(f"Expected behaviour: {out_prop}")
-            return "\n".join(lines), Reward(value=-0.04, reason="get_io cost")
-        # ── get_similar_rule ─────────────────────────────────────────────────
-        if at == ActionType.GET_SIMILAR_RULE:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            sr = get_similar_rule(
-                self._contracts,
-                self._contract["contract_name"],
-                name,
-            )
-            if sr is None:
-                return (
-                    "No similar rule available for this function.",
-                    Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
-                )
-            lines = [
-                f"Similar property from {sr['contract_name']}.{sr['function_name']}():",
-                f"  {sr['property_hint']}",
-            ]
-            if sr.get("natspec"):
-                lines.append(f"\nFunction NatSpec:\n  {sr['natspec']}")
-            return "\n".join(lines), Reward(value=-0.20, reason="get_similar_rule cost")
-        # ── submit_property ──────────────────────────────────────────────────
-        if at == ActionType.SUBMIT_PROPERTY:
-            if self._submitted:
-                return (
-                    "❌ You have already submitted a property for this episode. "
-                    "Only one submission is allowed.",
-                    Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
-                )
-            submitted_text = params.get("property", "").strip()
-            if not submitted_text:
-                return (
-                    "Submit requires 'property' key in params with a non-empty string.",
-                    Reward(value=-0.5, reason="Empty property submission"),
-                )
-            self._submitted = True
-            self._done      = True
-            score  = self._grader.grade(submitted_text)
-            reward = self._grader.reward_for_score(score)
-            bd     = self._grader.breakdown(submitted_text)
-            pct = int(score * 100)
-            if score >= 0.85:
-                emoji = "✅"
-                label = "EXCELLENT"
-            elif score >= 0.60:
-                emoji = "🟡"
-                label = "GOOD"
-            elif score >= 0.35:
-                emoji = "🟠"
-                label = "PARTIAL"
-            else:
-                emoji = "❌"
-                label = "POOR"
-            msg = (
-                f"{emoji} {label} — Score: {score:.2f}/1.00 → Reward: {reward:.2f}/5.00  ({pct}%)\n"
-                f"Key concepts matched   : {len(bd['key_matched'])}/{len(bd['key_matched'])+len(bd['key_missed'])}  "
-                f"{bd['key_matched']}\n"
-                f"Bonus concepts matched : {len(bd['bonus_matched'])}/{len(bd['bonus_matched'])+len(bd['bonus_missed'])}  "
-                f"{bd['bonus_matched']}"
-            )
-            return msg, Reward(
-                value=reward,
-                reason=f"Property submission score={score:.3f}",
-                partial=False,
-            )
-        # ── unknown action ────────────────────────────────────────────────────
-        return (
-            f"Unknown action type: '{at}'. Valid: {[a.value for a in AVAILABLE_ACTIONS]}",
-            Reward(value=-0.10, reason="Unknown action"),
-        )

 import random
 from typing import Any, Dict, List, Optional, Set
+import actions
+from data.data_loader import load_contracts, sample_property_episode
 from env.base_env import BaseEnv
 from env.schemas import (
     Action,
                 f"Function  : {self._target_fn['name']}  "
                 f"({self._target_fn.get('signature', '')})\n"
                 f"Your task : Discover the natural-language property of "
+                f"'{self._target_fn['name']}' and submit it with submit_property action."
             ),
         )
         return ResetResult(observation=obs, info={"task_id": TASK_ID})
         return False
     def _dispatch(self, action: Action) -> tuple[str, Reward]:
+        at = action.action_type
         params = action.params
+        qkey = self._qkey(at, params)
+        # Mapping from ActionType to handler function
+        # Each handler expects (ctx, qkey, params) and returns (str, Reward)
+        handlers = {
+            ActionType.GET_FUNCTION_CODE:    actions.get_function_code,
+            ActionType.GET_FUNCTION_NATSPEC: actions.get_function_natspec,
+            ActionType.GET_FILE_NATSPEC:     actions.get_file_natspec,
+            ActionType.GET_RELATED_FUNCTIONS: actions.get_related_functions_action,
+            ActionType.GET_IO:               actions.get_io,
+            ActionType.GET_SIMILAR_RULE:     actions.get_similar_rule_action,
+            ActionType.SUBMIT_PROPERTY:      actions.submit_property,
+        }
+        handler = handlers.get(at)
+        if handler is None:
+            return actions.unknown_action(self, qkey, params, at)
+        return handler(self, qkey, params)

tasks/task3/actions.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""Task 3: Identify the function that violates a specified property."""
+from typing import Any, Dict, Tuple
+from data.data_loader import (
+    get_function_by_name,
+    list_function_names,
+    list_state_variable_names,
+    get_state_variable_by_name
+)
+from env.schemas import Reward, ActionType
+def list_functions(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle LIST_FUNCTIONS action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    names = list_function_names(ctx._contract)
+    return (
+        f"Functions in {ctx._contract['contract_name']}: {', '.join(names)}",
+        Reward(value=-0.05, reason="list_functions cost"),
+    )
+def get_function_metadata(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FUNCTION_METADATA action."""
+    fn_name = params.get("function_name", "")
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    fn = get_function_by_name(ctx._contract, fn_name)
+    if fn is None:
+        return (
+            f"Function '{fn_name}' not found. "
+            f"Available: {list_function_names(ctx._contract)}",
+            Reward(value=-0.05, reason="Unknown function"),
+        )
+    params_list = fn.get("parameters", [])
+    modifiers = fn.get("modifiers", [])
+    lines = [
+        f"Function   : {fn.get('signature', fn_name)}",
+        f"Visibility : {fn.get('visibility', 'unknown')}",
+        f"Modifiers  : {', '.join(modifiers) if modifiers else 'none'}",
+    ]
+    if params_list:
+        lines.append("Parameters :")
+        for p in params_list:
+            lines.append(f"  {p['type']} {p['name']} — {p.get('description','')}")
+    else:
+        lines.append("Parameters : none")
+    lines.append(f"Returns    : {fn.get('returns','') or 'void'}")
+    lines.append(f"Summary    : {fn.get('comment','')}")
+    return "\n".join(lines), Reward(value=-0.05, reason="get_function_metadata cost")
+def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FUNCTION_CODE action."""
+    fn_name = params.get("function_name", "")
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    fn = get_function_by_name(ctx._contract, fn_name)
+    if fn is None:
+        return (
+            f"Function '{fn_name}' not found. "
+            f"Available: {list_function_names(ctx._contract)}",
+            Reward(value=-0.10, reason="Unknown function — extra penalty"),
+        )
+    code = fn.get("code", "// no code available")
+    return (
+        f"// {fn_name}\n{code}",
+        Reward(value=-0.10, reason="get_function_code cost"),
+    )
+def get_state_variable(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_STATE_VARIABLE action."""
+    var_name = params.get("variable_name", "")
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    if not var_name:
+        names = list_state_variable_names(ctx._contract)
+        return (
+            f"State variables: {', '.join(names)}",
+            Reward(value=-0.05, reason="Listed state variables"),
+        )
+    sv = get_state_variable_by_name(ctx._contract, var_name)
+    if sv is None:
+        return (
+            f"Variable '{var_name}' not found.",
+            Reward(value=-0.05, reason="Unknown state variable"),
+        )
+    return (
+        f"{sv['type']} {sv['visibility']} {sv['name']}: {sv.get('description','')}",
+        Reward(value=-0.05, reason="get_state_variable cost"),
+    )
+def get_call_graph(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_CALL_GRAPH action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    cg = ctx._contract.get("call_graph", {})
+    cg_str = "; ".join(
+        f"{fn} → [{', '.join(callees)}]" for fn, callees in cg.items()
+    )
+    return (
+        f"Call graph: {cg_str}",
+        Reward(value=-0.08, reason="get_call_graph cost"),
+    )
+# TODO: Need to change this, property_formal doesn't exists
+def get_formalized_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_FORMALIZED_PROPERTY action."""
+    if ctx._is_repeated(qkey):
+        return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    formal = ctx._target_fn.get("task3", {}).get("property_formal", "")
+    if not formal:
+        formal = "No formal specification available for this property."
+    return (
+        f"Formal property:\n{formal}",
+        Reward(value=-0.03, reason="get_formalized_property cost"),
+    )
+def submit_function(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle SUBMIT_FUNCTION action."""
+    if ctx._submitted:
+        return (
+            "❌ You have already submitted for this episode. "
+            "Only ONE submission is allowed.",
+            Reward(value=-1.0, reason="Second submit_function attempt", partial=False),
+        )
+    fn_name = params.get("function_name", "").strip()
+    if not fn_name:
+        return (
+            "submit_function requires 'function_name' in params.",
+            Reward(value=-0.5, reason="Malformed submission"),
+        )
+    ctx._submitted = True
+    ctx._done = True
+    score, reward_val = ctx._grader.grade_and_reward(fn_name)
+    correct = ctx._grader.get_canonical_answer()
+    if score >= 0.9:
+        msg = (
+            f"✅ CORRECT! '{fn_name}' is the function that violates the property. "
+            f"Score: 1.0 → Reward: +{reward_val:.1f}"
+        )
+    elif score >= 0.2:
+        msg = (
+            f"🟡 PARTIAL. '{fn_name}' is a subfunction of the target — "
+            f"closely related but not the primary rule-breaker. "
+            f"Score: 0.3 → Reward: +{reward_val:.1f}. "
+            f"Correct answer: '{correct['target_function']}'."
+        )
+    else:
+        msg = (
+            f"❌ INCORRECT. '{fn_name}' does not violate the property. "
+            f"Score: 0.0 → Reward: {reward_val:.1f}. "
+            f"Correct answer: '{correct['target_function']}'."
+        )
+    return msg, Reward(
+        value=reward_val,
+        reason=f"submit_function score={score:.1f}",
+        partial=False,
+    )
+def unknown_action(ctx: Any, qkey: str, params: Dict, action_type: str) -> Tuple[str, Reward]:
+    """Fallback for unknown actions."""
+    return (
+        f"Unknown action '{action_type}'. Valid: {[a.value for a in ActionType]}",
+        Reward(value=-0.10, reason="Unknown action"),
+    )

tasks/task3/environment.py CHANGED Viewed

@@ -35,15 +35,9 @@ from __future__ import annotations
 import random
 from typing import Any, Dict, List, Optional, Set
-from data.data_loader import (
-    load_contracts,
-    sample_task3_episode,
-    get_function_by_name,
-    get_state_variable_by_name,
-    list_function_names,
-    list_state_variable_names,
-)
 from env.base_env import BaseEnv
 from env.schemas import (
     Action,
@@ -188,163 +182,25 @@ class Task3Environment(BaseEnv):
             return True
         self._seen.add(key)
         return False
     def _dispatch(self, action: Action) -> tuple[str, Reward]:
-        at     = action.action_type
         params = action.params
-        qkey   = self._qkey(at, params)
-        # ── list_functions ────────────────────────────────────────────────────
-        if at == ActionType.LIST_FUNCTIONS:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            names = list_function_names(self._contract)
-            return (
-                f"Functions in {self._contract['contract_name']}: {', '.join(names)}",
-                Reward(value=-0.05, reason="list_functions cost"),
-            )
-        # ── get_function_metadata ─────────────────────────────────────────────
-        if at == ActionType.GET_FUNCTION_METADATA:
-            fn_name = params.get("function_name", "")
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            fn = get_function_by_name(self._contract, fn_name)
-            if fn is None:
-                return (
-                    f"Function '{fn_name}' not found. "
-                    f"Available: {list_function_names(self._contract)}",
-                    Reward(value=-0.05, reason="Unknown function"),
-                )
-            params_list = fn.get("parameters", [])
-            modifiers   = fn.get("modifiers", [])
-            lines = [
-                f"Function   : {fn.get('signature', fn_name)}",
-                f"Visibility : {fn.get('visibility', 'unknown')}",
-                f"Modifiers  : {', '.join(modifiers) if modifiers else 'none'}",
-            ]
-            if params_list:
-                lines.append("Parameters :")
-                for p in params_list:
-                    lines.append(f"  {p['type']} {p['name']} — {p.get('description','')}")
-            else:
-                lines.append("Parameters : none")
-            lines.append(f"Returns    : {fn.get('returns','') or 'void'}")
-            lines.append(f"Summary    : {fn.get('comment','')}")
-            return "\n".join(lines), Reward(value=-0.05, reason="get_function_metadata cost")
-        # ── get_function_code ─────────────────────────────────────────────────
-        if at == ActionType.GET_FUNCTION_CODE:
-            fn_name = params.get("function_name", "")
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            fn = get_function_by_name(self._contract, fn_name)
-            if fn is None:
-                return (
-                    f"Function '{fn_name}' not found. "
-                    f"Available: {list_function_names(self._contract)}",
-                    Reward(value=-0.10, reason="Unknown function — extra penalty"),
-                )
-            code = fn.get("code", "// no code available")
-            return (
-                f"// {fn_name}\n{code}",
-                Reward(value=-0.10, reason="get_function_code cost"),
-            )
-        # ── get_state_variables ───────────────────────────────────────────────
-        if at == ActionType.GET_STATE_VARIABLE:
-            var_name = params.get("variable_name", "")
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            if not var_name:
-                names = list_state_variable_names(self._contract)
-                return (
-                    f"State variables: {', '.join(names)}",
-                    Reward(value=-0.05, reason="Listed state variables"),
-                )
-            sv = get_state_variable_by_name(self._contract, var_name)
-            if sv is None:
-                return (
-                    f"Variable '{var_name}' not found.",
-                    Reward(value=-0.05, reason="Unknown state variable"),
-                )
-            return (
-                f"{sv['type']} {sv['visibility']} {sv['name']}: {sv.get('description','')}",
-                Reward(value=-0.05, reason="get_state_variable cost"),
-            )
-        # ── get_call_graph ────────────────────────────────────────────────────
-        if at == ActionType.GET_CALL_GRAPH:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            cg  = self._contract.get("call_graph", {})
-            cg_str = "; ".join(
-                f"{fn} → [{', '.join(callees)}]" for fn, callees in cg.items()
-            )
-            return (
-                f"Call graph: {cg_str}",
-                Reward(value=-0.08, reason="get_call_graph cost"),
-            )
-        # ── get_formalized_property ───────────────────────────────────────────
-        if at == ActionType.GET_FORMALIZED_PROPERTY:
-            if self._is_repeated(qkey):
-                return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-            formal = self._target_fn.get("task3", {}).get("property_formal", "")
-            if not formal:
-                formal = "No formal specification available for this property."
-            return (
-                f"Formal property:\n{formal}",
-                Reward(value=-0.03, reason="get_formalized_property cost"),
-            )
-        # ── submit_function ───────────────────────────────────────────────────
-        if at == ActionType.SUBMIT_FUNCTION:
-            if self._submitted:
-                return (
-                    "❌ You have already submitted for this episode. "
-                    "Only ONE submission is allowed.",
-                    Reward(value=-1.0, reason="Second submit_function attempt", partial=False),
-                )
-            fn_name = params.get("function_name", "").strip()
-            if not fn_name:
-                return (
-                    "submit_function requires 'function_name' in params.",
-                    Reward(value=-0.5, reason="Malformed submission"),
-                )
-            self._submitted = True
-            self._done      = True
-            score, reward_val = self._grader.grade_and_reward(fn_name)
-            correct = self._grader.get_canonical_answer()
-            if score >= 0.9:
-                msg = (
-                    f"✅ CORRECT! '{fn_name}' is the function that violates the property. "
-                    f"Score: 1.0 → Reward: +{reward_val:.1f}"
-                )
-            elif score >= 0.2:
-                msg = (
-                    f"🟡 PARTIAL. '{fn_name}' is a subfunction of the target — "
-                    f"closely related but not the primary rule-breaker. "
-                    f"Score: 0.3 → Reward: +{reward_val:.1f}. "
-                    f"Correct answer: '{correct['target_function']}'."
-                )
-            else:
-                msg = (
-                    f"❌ INCORRECT. '{fn_name}' does not violate the property. "
-                    f"Score: 0.0 → Reward: {reward_val:.1f}. "
-                    f"Correct answer: '{correct['target_function']}'."
-                )
-            return msg, Reward(
-                value=reward_val,
-                reason=f"submit_function score={score:.1f}",
-                partial=False,
-            )
-        # ── unknown action ────────────────────────────────────────────────────
-        return (
-            f"Unknown action '{at}'. Valid: {[a.value for a in AVAILABLE_ACTIONS]}",
-            Reward(value=-0.10, reason="Unknown action"),
-        )

 import random
 from typing import Any, Dict, List, Optional, Set
+import actions
+from data.data_loader import load_contracts, sample_task3_episode
 from env.base_env import BaseEnv
 from env.schemas import (
     Action,
             return True
         self._seen.add(key)
         return False
     def _dispatch(self, action: Action) -> tuple[str, Reward]:
+        at = action.action_type
         params = action.params
+        qkey = self._qkey(at, params)
+        # Mapping from ActionType to handler function
+        handlers = {
+            ActionType.LIST_FUNCTIONS:           actions.list_functions,
+            ActionType.GET_FUNCTION_METADATA:    actions.get_function_metadata,
+            ActionType.GET_FUNCTION_CODE:        actions.get_function_code,
+            ActionType.GET_STATE_VARIABLE:       actions.get_state_variable,
+            ActionType.GET_CALL_GRAPH:           actions.get_call_graph,
+            ActionType.GET_FORMALIZED_PROPERTY:  actions.get_formalized_property,
+            ActionType.SUBMIT_FUNCTION:          actions.submit_function,
+        }
+        handler = handlers.get(at)
+        if handler is None:
+            return actions.unknown_action(self, qkey, params, at)
+        return handler(self, qkey, params)

utils/prompts.py ADDED Viewed

	@@ -0,0 +1,103 @@

+T1_SYSTEM = """You are an expert Solidity smart contract security auditor.
+Given a contract, identify the ONE vulnerable function and its vulnerability type.
+## Actions (choose ONE per turn, respond with JSON only):
+{"action": "list_functions",       "params": {}}
+{"action": "get_function_code",    "params": {"function_name": "<name>"}}
+{"action": "get_function_summary", "params": {"function_name": "<name>"}}
+{"action": "get_file_metadata",    "params": {}}
+{"action": "get_state_variable",   "params": {"variable_name": "<name>"}}
+{"action": "get_call_graph",       "params": {}}
+{"action": "submit",               "params": {"function_name": "<name>", "vulnerability_type": "<2-3 words>"}}
+## Strategy:
+1. list_functions first to see the attack surface
+2. Inspect suspicious functions (withdraw, drain, buy, stake, claim, setPrice, bid, finalize)
+3. Look for: reentrancy, missing access control, integer overflow, tx.origin, front-running,
+   timestamp dependence, denial of service, unchecked return value
+4. Submit when confident
+Respond ONLY with valid JSON. No explanation, no markdown."""
+T2_SYSTEM = """You are a formal methods engineer specialising in Solidity smart contracts.
+You will be shown a specific Solidity function. Your task is to write a precise
+natural-language property (invariant / postcondition) that describes what the
+function guarantees when it succeeds.
+A good property covers:
+  - What state changes (balances, counters, flags)
+  - What assets are transferred (ETH, tokens, NFTs)
+  - What return value is produced (for view functions)
+  - Under what conditions it reverts
+## Actions (respond with JSON only, ONE action per turn):
+{"action": "get_function_code",     "params": {}}
+{"action": "get_function_natspec",  "params": {}}
+{"action": "get_file_natspec",      "params": {}}
+def _t3_user_msg(obs: Dict[str, Any]) -> str:
+    extra = obs.get("extra", {})
+    return (
+        f"Contract  : {obs['contract_name']}\n"
+        f"Property  : {extra.get('property_english', '(no property)')}\n"
+        f"Step: {obs['step_count']} | Reward: {obs['cumulative_reward']:.2f}\n\n"
+        f"Last action: {obs['last_action'] or 'None'}\n"
+        f"Result:\n{obs['last_action_result'] or 'Episode started.'}"
+    )
+def run_t3_episode(env: Task3Environment, seed: int, ep: int) -> Dict[str, Any]:
+    r   = env.reset(seed=seed)
+    obs = r.observation.model_dump()
+    prop_preview = obs['extra'].get('property_english', '')[:55]
+    print(f"    ep={ep} seed={seed}  {obs['contract_name']}  \"{prop_preview}...\"")
+    messages = [{"role": "system", "content": T3_SYSTEM}]
+    grader_score = 0.0
+    cum_reward   = 0.0
+    for step in range(15):
+        messages.append({"role": "user", "content": _t3_user_msg(obs)})
+{"action": "get_related_functions", "params": {}}
+{"action": "get_io",                "params": {}}
+{"action": "get_similar_rule",      "params": {}}
+{"action": "submit_property",       "params": {"property": "<your full property text>"}}
+## Rules:
+- You have ONE submit_property attempt. Make it count.
+- Use get_function_natspec and get_io first — they give the most signal.
+- get_similar_rule costs more (-0.20) but shows a parallel property from another contract.
+- Write 2–4 sentences. Be specific about variable names and amounts.
+- Do NOT guess — read the code first.
+Respond ONLY with valid JSON. No markdown, no explanation."""
+T3_SYSTEM = """You are a smart contract security auditor checking rule compliance.
+You are given a Solidity contract and a property (rule) in natural English.
+Your task is to find the ONE function that violates this property.
+## Actions (respond with JSON only, ONE action per turn):
+{"action": "list_functions",          "params": {}}
+{"action": "get_formalized_property", "params": {}}
+{"action": "get_function_metadata",   "params": {"function_name": "<n>"}}
+{"action": "get_function_code",       "params": {"function_name": "<n>"}}
+{"action": "get_state_variable",      "params": {"variable_name": "<n>"}}
+{"action": "get_call_graph",          "params": {}}
+{"action": "submit_function",         "params": {"function_name": "<n>"}}
+## Strategy:
+1. Read the property shown as property_english in the observation.
+2. list_functions to survey candidates.
+3. get_formalized_property for the precise pre/post-condition (cheap: -0.03).
+4. get_function_code on the 1-2 most suspicious functions.
+5. submit_function when confident — ONE attempt only.
+Clues: missing require, no access modifier, unchecked external call, unbounded array,
+tx.origin auth, integer overflow, timestamp manipulation, reentrancy ordering.
+Respond ONLY with valid JSON. No markdown, no explanation."""