Spaces:

Codex47
/

SmartContractAudit

Running

App Files Files Community

ajaxwin commited on 13 days ago

Commit

cf983b8

1 Parent(s): e8c9acc

task2 reviewed, semanticmatcher implemented

Browse files

Files changed (14) hide show

.gitignore +1 -0
data/data_loader.py +1 -43
data/properties.csv +0 -0
env/schemas.py +1 -1
inference.py +1 -1
requirements.txt +5 -0
tasks/task1/grader.py +3 -3
tasks/task2/actions.py +24 -59
tasks/task2/environment.py +9 -9
tasks/task2/grader.py +12 -142
utils/__init__.py +4 -2
utils/matcher.py +0 -29
utils/propertyretriever.py +80 -0
utils/semanticmatcher.py +230 -0

.gitignore CHANGED Viewed

@@ -11,3 +11,4 @@ build/
 baseline_scores.json
 *.log
 .pytest_cache/

 baseline_scores.json
 *.log
 .pytest_cache/
+MySolution.md

data/data_loader.py CHANGED Viewed

@@ -110,7 +110,7 @@ def get_all_property_entries(
     entries = []
     for contract in contracts:
         for fn in contract.get("functions", []):
-            if fn.get("property") is not None:
                 entries.append((contract, fn))
     return entries
@@ -155,48 +155,6 @@ def get_related_functions(
     return sorted(related)
-# ! Function is completely wrong
-def get_similar_rule(
-    contracts: List[Dict[str, Any]],
-    current_contract_name: str,
-    current_function_name: str,
-) -> Optional[Dict[str, Any]]:
-    """
-    Returns the similar_rule hint stored in the target function's property field,
-    enriched with the referenced function's natspec if available.
-    Returns a dict with keys: contract_name, function_name, property_hint, natspec.
-    Returns None if no similar_rule is defined.
-    """
-    # Find target function
-    for contract in contracts:
-        if contract["contract_name"] == current_contract_name:
-            fn = get_function_by_name(contract, current_function_name)
-            if fn and fn.get("property") and fn["property"].get("similar_rule"):            # ! There is no property or similar_rule field
-                sr = fn["property"]["similar_rule"]
-                # Look up the referenced function's natspec
-                for c2 in contracts:
-                    if c2["contract_name"] == sr["contract_name"]:
-                        ref_fn = get_function_by_name(c2, sr["function_name"])
-                        if ref_fn:
-                            return {
-                                "contract_name": sr["contract_name"],
-                                "function_name": sr["function_name"],
-                                "property_hint": sr["property_hint"],
-                                "natspec": ref_fn.get("natspec", ""),
-                            }
-                # Referenced function not found — return hint only
-                return {
-                    "contract_name": sr["contract_name"],
-                    "function_name": sr["function_name"],
-                    "property_hint": sr["property_hint"],
-                    "natspec": "",
-                }
-    return None
 # ────────────────────────────────────────────────────────────────
 # Task 3 helpers
 # ────────────────────────────────────────────────────────────────

     entries = []
     for contract in contracts:
         for fn in contract.get("functions", []):
+            if fn.get("property", None) is not None and fn.get("vulnerable", False) is False:
                 entries.append((contract, fn))
     return entries
     return sorted(related)
 # ────────────────────────────────────────────────────────────────
 # Task 3 helpers
 # ────────────────────────────────────────────────────────────────

data/properties.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

env/schemas.py CHANGED Viewed

@@ -38,7 +38,7 @@ class ActionType(str, Enum):
     GET_FILE_NATSPEC      = "get_file_natspec"        # -0.03
     GET_FUNCTION_NATSPEC  = "get_function_natspec"    # -0.08
     GET_RELATED_FUNCTIONS = "get_related_functions"   # -0.06
-    GET_IO                = "get_io"                  # -0.04
     SUBMIT_PROPERTY       = "submit_property"         # scored 0–5, one attempt
     # ── Task 3 – Rule Checker ────────────────────────────────────────────────

     GET_FILE_NATSPEC      = "get_file_natspec"        # -0.03
     GET_FUNCTION_NATSPEC  = "get_function_natspec"    # -0.08
     GET_RELATED_FUNCTIONS = "get_related_functions"   # -0.06
+    GET_SIGNATURE         = "get_signature"           # -0.04
     SUBMIT_PROPERTY       = "submit_property"         # scored 0–5, one attempt
     # ── Task 3 – Rule Checker ────────────────────────────────────────────────

inference.py CHANGED Viewed

@@ -32,7 +32,7 @@ from tasks.task1.environment import Task1Environment
 from tasks.task2.environment import Task2Environment
 from tasks.task3.environment import Task3Environment
 from env.schemas import Action, ActionType
-from utils.prompts import T1_SYSTEM, T2_SYSTEM, T3_SYSTEM
 # ─────────────────────────────────────────────────────────────────────────────
 # Configuration

 from tasks.task2.environment import Task2Environment
 from tasks.task3.environment import Task3Environment
 from env.schemas import Action, ActionType
+from utils import T1_SYSTEM, T2_SYSTEM, T3_SYSTEM
 # ─────────────────────────────────────────────────────────────────────────────
 # Configuration

requirements.txt CHANGED Viewed

@@ -5,3 +5,8 @@ openai==1.51.0
 httpx==0.27.2
 python-multipart==0.0.9
 pyyaml==6.0.2

 httpx==0.27.2
 python-multipart==0.0.9
 pyyaml==6.0.2
+pandas==2.2.2
+numpy==2.1.1
+scikit-learn==1.5.0
+sentence-transformers==3.0.1
+nltk==3.9.4

tasks/task1/grader.py CHANGED Viewed

@@ -8,15 +8,15 @@ Deterministic grader. Score range: 0.0 – 1.0
   0.0 – wrong function name
 """
 from __future__ import annotations
-from typing import Dict, List, Optional
-from utils.matcher import match_strings
 from data.data_loader import load_vulnerabilities
 def match_vuln_keywords(submitted: str, expected: str) -> bool:
     """Checks if the submitted vulnerability type matches the expected one using keyword matching."""
     for types in load_vulnerabilities():
         if types["vulnerability"] == expected:
-            return match_strings(types["terms"], submitted)
     return False
 class Task1Grader:

   0.0 – wrong function name
 """
 from __future__ import annotations
+from typing import Dict
+from utils import SemanticMatcher
 from data.data_loader import load_vulnerabilities
 def match_vuln_keywords(submitted: str, expected: str) -> bool:
     """Checks if the submitted vulnerability type matches the expected one using keyword matching."""
     for types in load_vulnerabilities():
         if types["vulnerability"] == expected:
+            return SemanticMatcher().match(types["terms"], submitted)
     return False
 class Task1Grader:

tasks/task2/actions.py CHANGED Viewed

@@ -1,24 +1,34 @@
 """
 Actions for Task 2: Property Inference.
 """
 from typing import Any, Dict, Tuple
-from data.data_loader import get_function_by_name, get_related_functions, get_similar_rule
 from env.schemas import ActionType, Reward
 def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_FUNCTION_CODE action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
     fn = ctx._target_fn
-    name = fn["name"]
     code = fn.get("code", "// no code available")
     return (
-        f"// {name}\n{code}",
         Reward(value=-0.06, reason="get_function_code cost"),
     )
 def get_function_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_FUNCTION_NATSPEC action."""
@@ -69,56 +79,28 @@ def get_related_functions_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str
     return text, Reward(value=-0.06, reason="get_related_functions cost")
-def get_io(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
-    """Handle GET_IO action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
     fn = ctx._target_fn
-    name = fn["name"]
-    params_list = fn.get("parameters", [])
-    returns = fn.get("returns", "") or "void"
-    out_prop = fn.get("output_property", "")
-    visibility = fn.get("visibility", "")
-    modifiers = fn.get("modifiers", [])
-    lines = [f"Function: {fn.get('signature', name)}"]
-    lines.append(f"Visibility: {visibility}" + (f"  Modifiers: {', '.join(modifiers)}" if modifiers else ""))
-    if params_list:
-        lines.append("Parameters:")
-        for p in params_list:
-            lines.append(f"  • {p['type']} {p['name']}: {p.get('description','')}")
-    else:
-        lines.append("Parameters: none" + (" (payable)" if "payable" in fn.get("code", "") else ""))
-    lines.append(f"Returns: {returns}")
-    if out_prop:
-        lines.append(f"Expected behaviour: {out_prop}")
-    return "\n".join(lines), Reward(value=-0.04, reason="get_io cost")
-# ! Wrong Function, there is no similar_rule field in the dataset. This function will always return "No similar rule available for this function."
 def get_similar_rule_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_SIMILAR_RULE action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
-    sr = get_similar_rule(
-        ctx._contracts,
-        ctx._contract["contract_name"],
-        ctx._target_fn["name"],
-    )
-    if sr is None:
         return (
             "No similar rule available for this function.",
             Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
         )
-    lines = [
-        f"Similar property from {sr['contract_name']}.{sr['function_name']}():",
-        f"  {sr['property_hint']}",
-    ]
-    if sr.get("natspec"):
-        lines.append(f"\nFunction NatSpec:\n  {sr['natspec']}")
-    return "\n".join(lines), Reward(value=-0.20, reason="get_similar_rule cost")
 def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
@@ -129,6 +111,7 @@ def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
             "Only one submission is allowed.",
             Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
         )
     submitted_text = params.get("property", "").strip()
     if not submitted_text:
         return (
@@ -138,28 +121,10 @@ def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     ctx._submitted = True
     ctx._done = True
-    score = ctx._grader.grade(submitted_text)
-    reward = ctx._grader.reward_for_score(score)
-    bd = ctx._grader.breakdown(submitted_text)
-    pct = int(score * 100)
-    if score >= 0.85:
-        emoji, label = "✅", "EXCELLENT"
-    elif score >= 0.60:
-        emoji, label = "🟡", "GOOD"
-    elif score >= 0.35:
-        emoji, label = "🟠", "PARTIAL"
-    else:
-        emoji, label = "❌", "POOR"
-    msg = (
-        f"{emoji} {label} — Score: {score:.2f}/1.00 → Reward: {reward:.2f}/5.00  ({pct}%)\n"
-        f"Key concepts matched   : {len(bd['key_matched'])}/{len(bd['key_matched'])+len(bd['key_missed'])}  "
-        f"{bd['key_matched']}\n"
-        f"Bonus concepts matched : {len(bd['bonus_matched'])}/{len(bd['bonus_matched'])+len(bd['bonus_missed'])}  "
-        f"{bd['bonus_matched']}"
-    )
     return msg, Reward(
         value=reward,
         reason=f"Property submission score={score:.3f}",

 """
 Actions for Task 2: Property Inference.
+Defines the logic for each action type that the agent can take in the environment, including
+querying function code, NatSpec, related functions, and submitting the inferred property.
+ctx: context object containing episode state and data
+qkey: a unique key representing the specific query (used for tracking repeated queries)
+params: parameters for the action, such as the submitted property text for SUBMIT_PROPERTY
 """
 from typing import Any, Dict, Tuple
+from data.data_loader import get_function_by_name, get_related_functions
+from utils import PropertyRetriever
 from env.schemas import ActionType, Reward
+PropertyRetrieverInstance = PropertyRetriever()  # Load once at module level
+# TODO: Can separate into signature, visiblity
 def get_function_code(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_FUNCTION_CODE action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
     fn = ctx._target_fn
     code = fn.get("code", "// no code available")
     return (
+        code,
         Reward(value=-0.06, reason="get_function_code cost"),
     )
+# TODO: Can separate comment and output_property(output_comment)
 def get_function_natspec(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_FUNCTION_NATSPEC action."""
     return text, Reward(value=-0.06, reason="get_related_functions cost")
+def get_signature(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
+    """Handle GET_SIGNATURE action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
     fn = ctx._target_fn
+    sig = fn.get("signature")
+    return sig, Reward(value=-0.04, reason="get_signature cost")
 def get_similar_rule_action(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
     """Handle GET_SIMILAR_RULE action."""
     if ctx._is_repeated(qkey):
         return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
+    similar_rule = PropertyRetrieverInstance.get_similar_property(ctx._target_fn["code"])
+    if similar_rule is None:
         return (
             "No similar rule available for this function.",
             Reward(value=-0.20, reason="get_similar_rule cost (not found)"),
         )
+    return similar_rule, Reward(value=-0.20, reason="get_similar_rule cost")
 def submit_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
             "Only one submission is allowed.",
             Reward(value=-1.0, reason="Second submit_property attempt", partial=False),
         )
     submitted_text = params.get("property", "").strip()
     if not submitted_text:
         return (
     ctx._submitted = True
     ctx._done = True
+    score, confidence = ctx._grader.grade(submitted_text)
+    reward = round(score * 5.0, 4)
+    msg = f'Score: {score:.2f}/1.00 → Confidence: {confidence:.2f}\n'
     return msg, Reward(
         value=reward,
         reason=f"Property submission score={score:.3f}",

tasks/task2/environment.py CHANGED Viewed

@@ -50,7 +50,7 @@ AVAILABLE_ACTIONS = [
     ActionType.GET_FUNCTION_NATSPEC,
     ActionType.GET_FILE_NATSPEC,
     ActionType.GET_RELATED_FUNCTIONS,
-    ActionType.GET_IO,
     ActionType.GET_SIMILAR_RULE,
     ActionType.SUBMIT_PROPERTY,
 ]
@@ -85,7 +85,7 @@ class Task2Environment(BaseEnv):
         )
         self._grader = Task2Grader(
             function_name=self._target_fn["name"],
-            property_data=self._target_fn["property"],
         )
         self._step_count = 0
         self._cum_reward = 0.0
@@ -184,13 +184,13 @@ class Task2Environment(BaseEnv):
         # Mapping from ActionType to handler function
         # Each handler expects (ctx, qkey, params) and returns (str, Reward)
         handlers = {
-            ActionType.GET_FUNCTION_CODE:    actions.get_function_code,
-            ActionType.GET_FUNCTION_NATSPEC: actions.get_function_natspec,
-            ActionType.GET_FILE_NATSPEC:     actions.get_file_natspec,
-            ActionType.GET_RELATED_FUNCTIONS: actions.get_related_functions_action,
-            ActionType.GET_IO:               actions.get_io,
-            ActionType.GET_SIMILAR_RULE:     actions.get_similar_rule_action,
-            ActionType.SUBMIT_PROPERTY:      actions.submit_property,
         }
         handler = handlers.get(at)

     ActionType.GET_FUNCTION_NATSPEC,
     ActionType.GET_FILE_NATSPEC,
     ActionType.GET_RELATED_FUNCTIONS,
+    ActionType.GET_SIGNATURE,
     ActionType.GET_SIMILAR_RULE,
     ActionType.SUBMIT_PROPERTY,
 ]
         )
         self._grader = Task2Grader(
             function_name=self._target_fn["name"],
+            property=self._target_fn["property"],
         )
         self._step_count = 0
         self._cum_reward = 0.0
         # Mapping from ActionType to handler function
         # Each handler expects (ctx, qkey, params) and returns (str, Reward)
         handlers = {
+            ActionType.GET_FUNCTION_CODE:       actions.get_function_code,
+            ActionType.GET_FUNCTION_NATSPEC:    actions.get_function_natspec,
+            ActionType.GET_FILE_NATSPEC:        actions.get_file_natspec,
+            ActionType.GET_RELATED_FUNCTIONS:   actions.get_related_functions_action,
+            ActionType.GET_SIGNATURE:           actions.get_signature,
+            ActionType.GET_SIMILAR_RULE:        actions.get_similar_rule_action,
+            ActionType.SUBMIT_PROPERTY:         actions.submit_property,
         }
         handler = handlers.get(at)

tasks/task2/grader.py CHANGED Viewed

@@ -2,101 +2,13 @@
 grader.py  (Task 2 – Property Discovery)
 -----------------------------------------
 Deterministic scorer for natural-language property submissions.
-Score formula
-─────────────
-  key_phrases   weight = 0.70
-  bonus_phrases weight = 0.30
-  score = 0.70 * (matched_key / total_key)
-        + 0.30 * (matched_bonus / total_bonus)
-Phrase matching
-───────────────
-A phrase is considered matched if ALL its words (after normalisation)
-appear in the submitted text. This is intentionally lenient — it
-doesn't require the words to be adjacent, so "balance increases by
-msg.value" is matched by "the caller's vault balance increases by the
-sent msg.value amount".
-Synonym expansion allows common paraphrases to also match
-(e.g. "caller" → "msg.sender", "sender", "user").
-Terminal reward = score × 5.0   (range: 0.0 – 5.0)
 One submission attempt per episode.
 """
 from __future__ import annotations
-import string
-from typing import Dict, List, Optional
-# ── Text normalisation ────────────────────────────────────────────────────────
-_PUNCT = str.maketrans("", "", string.punctuation)
-def _norm(text: str) -> str:
-    """Lowercase, strip punctuation, collapse whitespace → word string."""
-    return " ".join(text.lower().translate(_PUNCT).split())
-def _word_set(text: str) -> set:
-    """Return normalised set of words."""
-    return set(_norm(text).split())
-# ── Synonym table ─────────────────────────────────────────────────────────────
-# Maps canonical word → list of accepted synonyms (all lowercase, no punct).
-SYNONYMS: Dict[str, List[str]] = {
-    "caller":      ["caller", "sender", "user", "msgsender", "msg sender"],
-    "balance":     ["balance", "holdings", "amount held"],
-    "increases":   ["increases", "incremented", "added", "grows", "rise"],
-    "decreases":   ["decreases", "decremented", "reduced", "subtracted", "falls"],
-    "transfers":   ["transfers", "sends", "moved", "forwarded", "sent"],
-    "reverts":     ["reverts", "fails", "rejected", "throws", "require"],
-    "zero":        ["zero", "0", "nothing", "none", "empty"],
-    "owner":       ["owner", "admin", "authorized"],
-    "entire":      ["entire", "full", "whole", "all", "total"],
-    "returned":    ["returned", "sent back", "refunded", "transferred back"],
-    "reset":       ["reset", "zeroed", "set to zero", "cleared"],
-    "only":        ["only", "exclusively", "restricted"],
-    "price":       ["price", "cost", "rate"],
-    "tokens":      ["tokens", "token amount"],
-    "rewards":     ["rewards", "reward tokens", "accrued"],
-    "staked":      ["staked", "deposited", "locked"],
-    "winner":      ["winner", "winning bidder", "successful bidder"],
-}
-def _expand_words(phrase_words: List[str]) -> List[List[str]]:
-    """
-    For each word in the phrase, generate synonym variants.
-    Returns a list of word-list variants to try.
-    Only substitutes ONE word at a time to avoid combinatorial explosion.
-    """
-    variants = [phrase_words]  # original
-    for i, word in enumerate(phrase_words):
-        if word in SYNONYMS:
-            for syn in SYNONYMS[word]:
-                syn_words = _norm(syn).split()
-                new_variant = phrase_words[:i] + syn_words + phrase_words[i + 1:]
-                variants.append(new_variant)
-    return variants
-def _phrase_matched(text_words: set, phrase: str) -> bool:
-    """
-    True if ALL words in the phrase (or a synonym variant) appear in text_words.
-    Uses word-set containment, not substring adjacency.
-    """
-    norm_words = _norm(phrase).split()
-    for variant_words in _expand_words(norm_words):
-        if all(w in text_words for w in variant_words):
-            return True
-    return False
 # ── Grader ────────────────────────────────────────────────────────────────────
@@ -108,64 +20,22 @@ class Task2Grader:
     Parameters
     ----------
     function_name  : name of the target function
-    property_data  : the 'property' dict from the dataset
-                     Must have: natural_language, key_phrases, bonus_phrases
     """
-    KEY_WEIGHT   = 0.70
-    BONUS_WEIGHT = 0.30
-    def __init__(self, function_name: str, property_data: Dict) -> None:
         self.function_name    = function_name
-        self.natural_language = property_data.get("natural_language", "")
-        self.key_phrases      = property_data.get("key_phrases", [])
-        self.bonus_phrases    = property_data.get("bonus_phrases", [])
     # ── Public API ────────────────────────────────────────────────────────────
-    def grade(self, submitted: str) -> float:
         """Deterministic score in [0.0, 1.0]."""
         if not submitted or not submitted.strip():
-            return 0.0
-        tw = _word_set(submitted)
-        key_score   = self._phrase_score(tw, self.key_phrases)
-        bonus_score = self._phrase_score(tw, self.bonus_phrases)
-        raw = self.KEY_WEIGHT * key_score + self.BONUS_WEIGHT * bonus_score
-        return round(min(max(raw, 0.0), 1.0), 4)
-    def reward_for_score(self, score: float) -> float:
-        """Maps [0.0, 1.0] → [0.0, 5.0]."""
-        return round(score * 5.0, 4)
-    def breakdown(self, submitted: str) -> Dict:
-        """Detailed scoring breakdown for debugging."""
-        tw = _word_set(submitted)
-        key_hits   = [p for p in self.key_phrases   if _phrase_matched(tw, p)]
-        bonus_hits = [p for p in self.bonus_phrases if _phrase_matched(tw, p)]
-        score      = self.grade(submitted)
-        return {
-            "score":         score,
-            "reward":        self.reward_for_score(score),
-            "key_matched":   key_hits,
-            "key_missed":    [p for p in self.key_phrases   if p not in key_hits],
-            "bonus_matched": bonus_hits,
-            "bonus_missed":  [p for p in self.bonus_phrases if p not in bonus_hits],
-            "key_score":     self._phrase_score(tw, self.key_phrases),
-            "bonus_score":   self._phrase_score(tw, self.bonus_phrases),
-        }
-    def get_canonical_answer(self) -> Dict:
-        """For debugging / logging only."""
-        return {
-            "function":         self.function_name,
-            "natural_language": self.natural_language,
-            "key_phrases":      self.key_phrases,
-        }
-    # ── Internal ──────────────────────────────────────────────────────────────
-    def _phrase_score(self, text_words: set, phrases: List[str]) -> float:
-        if not phrases:
-            return 1.0
-        matched = sum(1 for p in phrases if _phrase_matched(text_words, p))
-        return matched / len(phrases)

 grader.py  (Task 2 – Property Discovery)
 -----------------------------------------
 Deterministic scorer for natural-language property submissions.
 One submission attempt per episode.
 """
 from __future__ import annotations
+from typing import Tuple
+from utils import SemanticMatcher
 # ── Grader ────────────────────────────────────────────────────────────────────
     Parameters
     ----------
     function_name  : name of the target function
+    property  : the 'property' field from the target function's data
     """
+    def __init__(self, function_name: str, property: str) -> None:
         self.function_name    = function_name
+        self.property         = property
     # ── Public API ────────────────────────────────────────────────────────────
+    def grade(self, submitted: str) -> Tuple[float, str]:
         """Deterministic score in [0.0, 1.0]."""
         if not submitted or not submitted.strip():
+            return 0.0, "no_match"
+        SemanticMatcherInstance = SemanticMatcher()
+        return (
+            SemanticMatcherInstance.matchscore(self.property, submitted),
+            SemanticMatcherInstance.confidence()
+        )

utils/__init__.py CHANGED Viewed

@@ -1,4 +1,6 @@
 # utils package
-from utils.matcher import match_strings
-_all__ = ["match_strings"]

 # utils package
+from utils.semanticmatcher import SemanticMatcher
+from utils.prompts import T1_SYSTEM, T2_SYSTEM, T3_SYSTEM
+from utils.propertyretriever import PropertyRetriever
+__all__ = ["SemanticMatcher", "T1_SYSTEM", "T2_SYSTEM", "T3_SYSTEM", "PropertyRetriever"]

utils/matcher.py DELETED Viewed

@@ -1,29 +0,0 @@
-from typing import List
-def match_strings(terms: List[str], phrase: str, threshold: float = 0.6) -> bool:
-    """
-    Determine if a short string (2-3 words) is likely present in a list of terms
-    using token set similarity
-    Args:
-        terms: List of term strings (preferably lowercased).
-        phrase: Input phrase (2-3 words, case-insensitive).
-        threshold: Minimum Jaccard similarity to count as a match (default 0.6).
-    Returns:
-        True if any term in the list has token set similarity >= threshold.
-    """
-    phrase_lower = phrase.lower().strip()
-    phrase_tokens = set(phrase_lower.split())
-    for term in terms:
-        term_tokens = set(term.split())
-        # Jaccard similarity = size of intersection / size of union
-        if not phrase_tokens or not term_tokens:
-            continue
-        intersection = phrase_tokens & term_tokens
-        union = phrase_tokens | term_tokens
-        similarity = len(intersection) / len(union)
-        if similarity >= threshold:
-            return True
-    return False

utils/propertyretriever.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+PropertyRetriever class for finding similar properties based on code embeddings.
+It loads a dataset of properties, computes embeddings for their critical code sections,
+and provides a method to retrieve the most similar property given a new code snippet.
+"""
+# ! Current data contains properties from contracts.json, making it more likely to find a exact match
+import pandas as pd
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from sklearn.preprocessing import normalize
+import os
+DATA_DIR = os.path.join(os.path.dirname(__file__))
+DEFAULT_CSV_PATH = os.path.join(DATA_DIR, "properties.csv")
+SIMILARITY_THRESHOLD = 0.8  # Adjust as needed based on validation
+# -------------------------------------------------------------------
+# 1. Load the dataset and build the vector database (offline/once)
+# -------------------------------------------------------------------
+class PropertyRetriever:
+    def __init__(self):
+        """
+        csv_path : path to the CSV file containing the columns:
+                   SpecHash, SpecIndex, Type, Name, StartLine, EndLine,
+                   MethodsInRule, RuleContent, RelatedFunctions,
+                   FunctionBodies, FilePath, Project, ContractCode,
+                   StateVarAssignment, RuleContentNL, Funcitonality
+        similarity_threshold : minimum dot product to consider a match
+        """
+        self.df = pd.read_csv(DEFAULT_CSV_PATH)
+        self.threshold = SIMILARITY_THRESHOLD
+        # Use a lightweight, open‑source embedding model
+        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        # Extract "critical code" from each property (use FunctionBodies)
+        # Fallback to RelatedFunctions or RuleContent if FunctionBodies is missing
+        self.critical_codes = []
+        for idx, row in self.df.iterrows():
+            code = row.get('FunctionBodies', '')
+            if pd.isna(code) or code.strip() == '':
+                # Fallback: concatenate RelatedFunctions or use RuleContent
+                code = row.get('RelatedFunctions', '')
+                if pd.isna(code) or code.strip() == '':
+                    code = row.get('RuleContent', '')
+            self.critical_codes.append(str(code))
+        # Compute embeddings for all critical codes
+        self.embeddings = self.embedder.encode(self.critical_codes, show_progress_bar=True)
+        # Normalize for dot product = cosine similarity
+        self.embeddings = normalize(self.embeddings, norm='l2')
+    def get_similar_property(self, input_code: str) -> str:
+        """
+        Given a Solidity function code string, return the most similar property
+        (RuleContent) from the dataset, or an empty string if none exceeds the threshold.
+        """
+        if not input_code or not isinstance(input_code, str):
+            return ""
+        # Step ②: Embed the subject code
+        query_emb = self.embedder.encode([input_code])
+        query_emb = normalize(query_emb, norm='l2')
+        # Step ③: Compute dot products with all database vectors
+        similarities = np.dot(self.embeddings, query_emb.T).flatten()
+        # Find the best match above threshold
+        best_idx = np.argmax(similarities)
+        best_score = similarities[best_idx]
+        if best_score >= self.threshold:
+            # Return the property content (RuleContent) of the best match
+            return self.df.iloc[best_idx]['RuleContentNL']
+        else:
+            # No sufficiently similar property found
+            return ""

utils/semanticmatcher.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+semanticmatcher.py
+====================
+Deterministic semantic string matcher for short strings (10–12 words).
+Algorithm: Weighted ensemble of three independent signals
+  1. Lexical Jaccard     — lemmatized token overlap (weight: 0.20)
+  2. Synonym Jaccard     — WordNet-expanded token overlap (weight: 0.25)
+  3. Semantic Cosine     — sentence-transformers embedding similarity (weight: 0.55)
+All three layers are fully deterministic: same inputs → same score, always.
+Install dependencies:
+  python -m nltk.downloader wordnet omw-1.4 stopwords punkt punkt_tab averaged_perceptron_tagger_eng
+"""
+import re
+import string
+from functools import lru_cache
+import nltk
+import numpy as np
+from nltk.corpus import wordnet, stopwords
+from nltk.stem import WordNetLemmatizer
+from sentence_transformers import SentenceTransformer
+# ── Config ────────────────────────────────────────────────────────────────────
+WEIGHTS = {
+    "lexical":  0.20,   # Plain lemma overlap
+    "synonym":  0.25,   # WordNet-expanded overlap
+    "semantic": 0.55,   # Embedding cosine similarity
+}
+MATCH_THRESHOLD   = 0.72   # Score ≥ this → strings "mean the same thing"
+STRONG_THRESHOLD  = 0.88   # Score ≥ this → high-confidence match
+# Embedding model: deterministic, no sampling
+_EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+# ── Lazy singletons ───────────────────────────────────────────────────────────
+_model: SentenceTransformer | None = None
+_lemmatizer: WordNetLemmatizer | None = None
+_stop_words: set[str] | None = None
+def _get_model() -> SentenceTransformer:
+    global _model
+    if _model is None:
+        _model = SentenceTransformer(_EMBEDDING_MODEL_NAME)
+    return _model
+def _get_lemmatizer() -> WordNetLemmatizer:
+    global _lemmatizer
+    if _lemmatizer is None:
+        _lemmatizer = WordNetLemmatizer()
+    return _lemmatizer
+def _get_stopwords() -> set[str]:
+    global _stop_words
+    if _stop_words is None:
+        _stop_words = set(stopwords.words("english"))
+    return _stop_words
+# ── Text preprocessing ────────────────────────────────────────────────────────
+def _get_wordnet_pos(treebank_tag: str) -> str:
+    """Map POS treebank tag to WordNet POS constant for better lemmatization."""
+    if treebank_tag.startswith("J"):
+        return wordnet.ADJ
+    elif treebank_tag.startswith("V"):
+        return wordnet.VERB
+    elif treebank_tag.startswith("R"):
+        return wordnet.ADV
+    return wordnet.NOUN
+def normalize(text: str) -> str:
+    """Lowercase, strip punctuation, collapse whitespace."""
+    text = text.lower()
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+def tokenize_and_lemmatize(text: str) -> list[str]:
+    """Tokenize, POS-tag, lemmatize, and remove stopwords."""
+    lemmatizer = _get_lemmatizer()
+    stop_words = _get_stopwords()
+    tokens = nltk.word_tokenize(normalize(text))
+    pos_tags = nltk.pos_tag(tokens)
+    lemmas = [
+        lemmatizer.lemmatize(word, _get_wordnet_pos(pos))
+        for word, pos in pos_tags
+        if word not in stop_words and word.isalpha()
+    ]
+    return lemmas
+# ── WordNet synonym expansion ─────────────────────────────────────────────────
+@lru_cache(maxsize=512)
+def _synonyms(word: str) -> frozenset[str]:
+    """Return all WordNet lemma names for a word (including the word itself)."""
+    syns: set[str] = {word}
+    for synset in wordnet.synsets(word):
+        for lemma in synset.lemmas(): # type: ignore
+            syns.add(lemma.name().replace("_", " ").lower())
+    return frozenset(syns)
+def expand_with_synonyms(tokens: list[str]) -> set[str]:
+    """Expand a token list to include all WordNet synonyms."""
+    expanded: set[str] = set()
+    for token in tokens:
+        expanded.update(_synonyms(token))
+    return expanded
+# ── Similarity metrics ────────────────────────────────────────────────────────
+def jaccard(set_a: set[str], set_b: set[str]) -> float:
+    """Jaccard similarity: |A ∩ B| / |A ∪ B|"""
+    if not set_a and not set_b:
+        return 1.0
+    intersection = set_a & set_b
+    union = set_a | set_b
+    return len(intersection) / len(union)
+def cosine_similarity(vec_a: np.ndarray, vec_b: np.ndarray) -> float:
+    """Cosine similarity between two L2-normalized vectors."""
+    norm_a = np.linalg.norm(vec_a)
+    norm_b = np.linalg.norm(vec_b)
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return float(np.dot(vec_a, vec_b) / (norm_a * norm_b))
+# ── Core matcher ──────────────────────────────────────────────────────────────
+class SemanticMatcher:
+    """
+    Deterministic semantic matcher for short strings.
+    Usage:
+        matcher = SemanticMatcher()
+        result  = matcher.match("The cat sat on the mat",
+                                "A cat was sitting on the mat")
+        print(result)
+    """
+    def __init__(
+        self,
+        match_threshold: float = MATCH_THRESHOLD,
+        strong_threshold: float = STRONG_THRESHOLD,
+        weights: dict[str, float] | None = None,
+    ):
+        self.match_threshold  = match_threshold
+        self.strong_threshold = strong_threshold
+        self.weights = weights or WEIGHTS
+        self.confidence_level: str = "no_match"
+        total = sum(self.weights.values())
+        assert abs(total - 1.0) < 1e-6, f"Weights must sum to 1.0 (got {total:.4f})"
+    # ── Inner Functions ────────────────────────────────────────────────────
+    def _layer_lexical(self, tokens_a: list[str], tokens_b: list[str]) -> float:
+        return jaccard(set(tokens_a), set(tokens_b))
+    def _layer_synonym(self, tokens_a: list[str], tokens_b: list[str]) -> float:
+        expanded_a = expand_with_synonyms(tokens_a)
+        expanded_b = expand_with_synonyms(tokens_b)
+        return jaccard(expanded_a, expanded_b)
+    def _layer_semantic(self, text_a: str, text_b: str) -> float:
+        model = _get_model()
+        # encode() is deterministic: no sampling, fixed weights
+        embeddings = model.encode(
+            [normalize(text_a), normalize(text_b)],
+            convert_to_numpy=True,
+            normalize_embeddings=True,
+        )
+        return cosine_similarity(embeddings[0], embeddings[1]) # type: ignore
+    # ── Public Functions ────────────────────────────────────────────────────
+    def matchscore(self, text_a: str, text_b: str) -> float:
+        """
+        Compare two strings and return a score of whether they are matching.
+        Returns a float between 0.0 and 1.0, where 1.0 indicates a perfect match.
+        """
+        # Fast-path: normalized exact match
+        if normalize(text_a) == normalize(text_b):
+            return True
+        tokens_a = tokenize_and_lemmatize(text_a)
+        tokens_b = tokenize_and_lemmatize(text_b)
+        layer_scores = {
+            "lexical":  self._layer_lexical(tokens_a, tokens_b),
+            "synonym":  self._layer_synonym(tokens_a, tokens_b),
+            "semantic": self._layer_semantic(text_a, text_b),
+        }
+        score = sum(self.weights[k] * v for k, v in layer_scores.items())
+        if score >= self.strong_threshold:
+            self.confidence_level = "strong"
+        elif score >= self.match_threshold:
+            self.confidence_level = "moderate"
+        else:
+            self.confidence_level = "no_match"
+        return score
+    def match(self, text_a: str, text_b: str) -> bool:
+        """Return True if the two texts are considered a match based on the score."""
+        score = self.matchscore(text_a, text_b)
+        return score >= self.match_threshold
+    def confidence(self) -> str:
+        """Return 'strong' if score ≥ strong_threshold, else 'weak'."""
+        return self.confidence_level