Spaces:
Running
Running
ajaxwin commited on
Commit ·
8fccda7
1
Parent(s): 08c19c7
New matching logic for grader
Browse files- data/data_loader.py +7 -1
- data/vulnerabilities.json +192 -0
- data/vulnerabilities.md +31 -0
- eval.py +1 -0
- tasks/task1/environment.py +3 -3
- tasks/task1/grader.py +9 -69
- utils/__init__.py +3 -0
- utils/matcher.py +29 -0
data/data_loader.py
CHANGED
|
@@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
| 14 |
|
| 15 |
DATA_DIR = os.path.join(os.path.dirname(__file__))
|
| 16 |
DEFAULT_CONTRACTS_FILE = os.path.join(DATA_DIR, "contracts.json")
|
| 17 |
-
|
| 18 |
|
| 19 |
def load_contracts(path: str = DEFAULT_CONTRACTS_FILE) -> List[Dict[str, Any]]:
|
| 20 |
"""Load and return all contracts from the JSON dataset."""
|
|
@@ -22,6 +22,12 @@ def load_contracts(path: str = DEFAULT_CONTRACTS_FILE) -> List[Dict[str, Any]]:
|
|
| 22 |
return json.load(f)
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def get_all_vulnerable_entries(
|
| 26 |
contracts: List[Dict[str, Any]],
|
| 27 |
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
|
|
|
| 14 |
|
| 15 |
DATA_DIR = os.path.join(os.path.dirname(__file__))
|
| 16 |
DEFAULT_CONTRACTS_FILE = os.path.join(DATA_DIR, "contracts.json")
|
| 17 |
+
DEFAULT_VUNERABILITIES_FILE = os.path.join(DATA_DIR, "vulnerabilities.json")
|
| 18 |
|
| 19 |
def load_contracts(path: str = DEFAULT_CONTRACTS_FILE) -> List[Dict[str, Any]]:
|
| 20 |
"""Load and return all contracts from the JSON dataset."""
|
|
|
|
| 22 |
return json.load(f)
|
| 23 |
|
| 24 |
|
| 25 |
+
def load_vulnerabilities(path: str = DEFAULT_VUNERABILITIES_FILE) -> List[Dict[str, Any]]:
|
| 26 |
+
"""Load and return all vulnerability entries from the JSON dataset."""
|
| 27 |
+
with open(path, "r") as f:
|
| 28 |
+
return json.load(f)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
def get_all_vulnerable_entries(
|
| 32 |
contracts: List[Dict[str, Any]],
|
| 33 |
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
data/vulnerabilities.json
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"vulnerability": "Non-compliance with EIP4626 standard - previewDeposit",
|
| 4 |
+
"terms": [
|
| 5 |
+
"previewdeposit non-compliance",
|
| 6 |
+
"eip4626 previewdeposit issue",
|
| 7 |
+
"previewdeposit violation",
|
| 8 |
+
"previewdeposit standard break",
|
| 9 |
+
"previewdeposit limitation include",
|
| 10 |
+
"eip-4626 previewdeposit violation",
|
| 11 |
+
"previewdeposit spec deviation",
|
| 12 |
+
"previewdeposit max limitation error",
|
| 13 |
+
"4626 compliance failure (previewdeposit)",
|
| 14 |
+
"eip4626",
|
| 15 |
+
"previewdeposit",
|
| 16 |
+
"compliance",
|
| 17 |
+
"standard violation",
|
| 18 |
+
"maxdeposit",
|
| 19 |
+
"limitation"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"vulnerability": "Additive burn (rounding vulnerability)",
|
| 24 |
+
"terms": [
|
| 25 |
+
"additive burn",
|
| 26 |
+
"rounding burn",
|
| 27 |
+
"burn rounding",
|
| 28 |
+
"withdraw rounding zero burn",
|
| 29 |
+
"atoken burn rounding",
|
| 30 |
+
"rounding burn attack",
|
| 31 |
+
"additive burn rounding",
|
| 32 |
+
"withdraw rounding to zero",
|
| 33 |
+
"atoken conversion rounding",
|
| 34 |
+
"burn rounding vulnerability",
|
| 35 |
+
"rounding",
|
| 36 |
+
"burn",
|
| 37 |
+
"atoken",
|
| 38 |
+
"withdraw",
|
| 39 |
+
"precision loss",
|
| 40 |
+
"conversion rate",
|
| 41 |
+
"zero burn"
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"vulnerability": "Additive mint (Stable debt token)",
|
| 46 |
+
"terms": [
|
| 47 |
+
"additive mint",
|
| 48 |
+
"stable debt rounding",
|
| 49 |
+
"mint rounding vulnerability",
|
| 50 |
+
"debt token rounding",
|
| 51 |
+
"deposit rounding mint",
|
| 52 |
+
"rounding mint attack",
|
| 53 |
+
"additive mint rounding",
|
| 54 |
+
"stable debt token rounding",
|
| 55 |
+
"deposit rounding mint",
|
| 56 |
+
"debt token inflation",
|
| 57 |
+
"rounding",
|
| 58 |
+
"mint",
|
| 59 |
+
"stable debt",
|
| 60 |
+
"deposit",
|
| 61 |
+
"precision loss",
|
| 62 |
+
"debt token",
|
| 63 |
+
"inflation"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"vulnerability": "Non-compliance with EIP4626 standard - previewMint",
|
| 68 |
+
"terms": [
|
| 69 |
+
"previewmint non-compliance",
|
| 70 |
+
"eip4626 previewmint issue",
|
| 71 |
+
"previewmint violation",
|
| 72 |
+
"previewmint standard break",
|
| 73 |
+
"eip-4626 previewmint violation",
|
| 74 |
+
"previewmint spec deviation",
|
| 75 |
+
"previewmint max limitation error",
|
| 76 |
+
"4626 compliance failure (previewmint)",
|
| 77 |
+
"eip4626",
|
| 78 |
+
"previewmint",
|
| 79 |
+
"compliance",
|
| 80 |
+
"standard violation",
|
| 81 |
+
"maxmint",
|
| 82 |
+
"limitation"
|
| 83 |
+
]
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"vulnerability": "Non-compliance with EIP4626 standard - previewWithdraw",
|
| 87 |
+
"terms": [
|
| 88 |
+
"previewwithdraw non-compliance",
|
| 89 |
+
"eip4626 previewwithdraw issue",
|
| 90 |
+
"previewwithdraw violation",
|
| 91 |
+
"previewwithdraw standard break",
|
| 92 |
+
"eip-4626 previewwithdraw violation",
|
| 93 |
+
"previewwithdraw spec deviation",
|
| 94 |
+
"previewwithdraw max limitation error",
|
| 95 |
+
"4626 compliance failure (previewwithdraw)",
|
| 96 |
+
"eip4626",
|
| 97 |
+
"previewwithdraw",
|
| 98 |
+
"compliance",
|
| 99 |
+
"standard violation",
|
| 100 |
+
"maxwithdraw",
|
| 101 |
+
"limitation"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"vulnerability": "Non-compliance with EIP4626 standard - previewRedeem",
|
| 106 |
+
"terms": [
|
| 107 |
+
"previewredeem non-compliance",
|
| 108 |
+
"eip4626 previewredeem issue",
|
| 109 |
+
"previewredeem violation",
|
| 110 |
+
"previewredeem standard break",
|
| 111 |
+
"eip-4626 previewredeem violation",
|
| 112 |
+
"previewredeem spec deviation",
|
| 113 |
+
"previewredeem max limitation error",
|
| 114 |
+
"4626 compliance failure (previewredeem)",
|
| 115 |
+
"eip4626",
|
| 116 |
+
"previewredeem",
|
| 117 |
+
"compliance",
|
| 118 |
+
"standard violation",
|
| 119 |
+
"maxredeem",
|
| 120 |
+
"limitation"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"vulnerability": "Non-compliance with EIP4626 standard - non-reverting functions",
|
| 125 |
+
"terms": [
|
| 126 |
+
"non-reverting functions revert",
|
| 127 |
+
"eip4626 revert issue",
|
| 128 |
+
"totalassets revert",
|
| 129 |
+
"max functions revert",
|
| 130 |
+
"arithmetic revert vulnerability",
|
| 131 |
+
"eip-4626 reverting view functions",
|
| 132 |
+
"totalassets revert",
|
| 133 |
+
"max functions revert",
|
| 134 |
+
"arithmetic revert in 4626",
|
| 135 |
+
"view function revert violation",
|
| 136 |
+
"eip4626",
|
| 137 |
+
"revert",
|
| 138 |
+
"totalassets",
|
| 139 |
+
"maxdeposit",
|
| 140 |
+
"maxmint",
|
| 141 |
+
"maxwithdraw",
|
| 142 |
+
"maxredeem",
|
| 143 |
+
"arithmetic underflow",
|
| 144 |
+
"overflow"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"vulnerability": "Discount Factor Issue (Unfair finalization within a batch)",
|
| 149 |
+
"terms": [
|
| 150 |
+
"discount factor unfair",
|
| 151 |
+
"batch finalization unfair",
|
| 152 |
+
"fifo violation",
|
| 153 |
+
"discount factor flaw",
|
| 154 |
+
"unfair batch finalization",
|
| 155 |
+
"fifo fairness violation",
|
| 156 |
+
"batch finalization unfairness",
|
| 157 |
+
"discount factor miscalculation",
|
| 158 |
+
"queue finalization manipulation",
|
| 159 |
+
"unfair slashing distribution",
|
| 160 |
+
"discount factor",
|
| 161 |
+
"finalization",
|
| 162 |
+
"batch",
|
| 163 |
+
"fifo",
|
| 164 |
+
"fairness",
|
| 165 |
+
"queue",
|
| 166 |
+
"slashing",
|
| 167 |
+
"share rate"
|
| 168 |
+
]
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"vulnerability": "Potential for incorrect ETH transfer due to price calculation",
|
| 172 |
+
"terms": [
|
| 173 |
+
"incorrect eth transfer",
|
| 174 |
+
"price calculation error",
|
| 175 |
+
"claim eth wrong",
|
| 176 |
+
"discounted batch error",
|
| 177 |
+
"eth transfer miscalculation",
|
| 178 |
+
"claim amount miscalculation",
|
| 179 |
+
"eth transfer error",
|
| 180 |
+
"price calculation flaw",
|
| 181 |
+
"discounted batch eth error",
|
| 182 |
+
"incorrect eth withdrawal",
|
| 183 |
+
"eth transfer",
|
| 184 |
+
"claim",
|
| 185 |
+
"price calculation",
|
| 186 |
+
"discounted batch",
|
| 187 |
+
"finalization",
|
| 188 |
+
"incorrect amount",
|
| 189 |
+
"fund loss"
|
| 190 |
+
]
|
| 191 |
+
}
|
| 192 |
+
]
|
data/vulnerabilities.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
First Line is always the issue and second line is the issue description then space and the same repeats.
|
| 2 |
+
|
| 3 |
+
Non-compliance with EIP4626 standard - previewDeposit
|
| 4 |
+
As per EIP4626, all the preview functions must not take into account any limitation of the system, like those returned by the max() methods. In the contract, the preview methods do take into account system limitations.
|
| 5 |
+
|
| 6 |
+
Additive burn (rounding vulnerability)
|
| 7 |
+
Due to rounding in conversions to AToken, if the conversion rate is high enough, one can withdraw a small amount that will result in the system transferring underlying tokens but burning zero ATokens of the user's account
|
| 8 |
+
|
| 9 |
+
Additive mint (Stable debt token)
|
| 10 |
+
Due to rounding in conversions to stable debt token, if the conversion rate is high enough, one can deposit a small amount that will result in the system transferring underlying tokens but minting debt tokens to the user's account
|
| 11 |
+
|
| 12 |
+
Non-compliance with EIP4626 standard - previewDeposit
|
| 13 |
+
As per EIP4626, all the preview functions must not take into account any limitation of the system, like those returned by the max() methods. In the contract, the preview methods do take into account system limitations.
|
| 14 |
+
|
| 15 |
+
Non-compliance with EIP4626 standard - previewMint
|
| 16 |
+
As per EIP4626, all the preview functions must not take into account any limitation of the system, like those returned by the max() methods. In the contract, the preview methods do take into account system limitations.
|
| 17 |
+
|
| 18 |
+
Non-compliance with EIP4626 standard - previewWithdraw
|
| 19 |
+
As per EIP4626, all the preview functions must not take into account any limitation of the system, like those returned by the max() methods. In the contract, the preview methods do take into account system limitations
|
| 20 |
+
|
| 21 |
+
Non-compliance with EIP4626 standard - previewRedeem
|
| 22 |
+
As per EIP4626, all the preview functions must not take into account any limitation of the system, like those returned by the max() methods. In the contract, the preview methods do take into account system limitations
|
| 23 |
+
|
| 24 |
+
Non-compliance with EIP4626 standard - non-reverting functions
|
| 25 |
+
as per EIP4626, the functions totalAssets, maxDeposit, maxMint, maxWithdraw, and maxRedeem must not revert by any means. In the contract, however, these functions may revert due to over/underflows of arithmetical computations
|
| 26 |
+
|
| 27 |
+
Discount Factor Issue (Unfair finalization within a batch)
|
| 28 |
+
If two users in the same finalization batch entered the queue at different share rates (e.g., before and after a slashing event), the finalization logic weights the amount incorrectly. This can lead to one user losing ETH at the expense of another user within the same batch, which violates the FIFO principle of fairness.
|
| 29 |
+
|
| 30 |
+
Potential for incorrect ETH transfer due to price calculation
|
| 31 |
+
The `claim` function relies on the `_calculateDiscountedBatch` function, which was part of the original discount factor logic. The flaw in the `finalize` function's discount factor calculation directly impacts the amount of ETH that a user can claim. If the finalization was processed with a flawed discount factor, the user will claim an incorrect amount of ETH, either losing funds or gaining more than entitled
|
eval.py
CHANGED
|
@@ -82,6 +82,7 @@ def oracle_agent(env: Task1Environment, seed: int, verbose: bool = False) -> Dic
|
|
| 82 |
for contract in contracts:
|
| 83 |
for fn in contract.get("functions", []):
|
| 84 |
if fn["name"].lower() == target_fn.lower() and fn.get("vulnerable"):
|
|
|
|
| 85 |
vuln_issue = fn["vulnerability_details"]["issue"]
|
| 86 |
break
|
| 87 |
if vuln_issue:
|
|
|
|
| 82 |
for contract in contracts:
|
| 83 |
for fn in contract.get("functions", []):
|
| 84 |
if fn["name"].lower() == target_fn.lower() and fn.get("vulnerable"):
|
| 85 |
+
# ! SINCE OUR MATCHER IS BASED ON FACT THAT EXPECTED STRING IS 2-3 WORDS, THIS DOESN'T MATCH WELL
|
| 86 |
vuln_issue = fn["vulnerability_details"]["issue"]
|
| 87 |
break
|
| 88 |
if vuln_issue:
|
tasks/task1/environment.py
CHANGED
|
@@ -296,8 +296,8 @@ class Task1Environment(BaseEnv):
|
|
| 296 |
"Submit requires 'function_name' and 'vulnerability_type' in params.",
|
| 297 |
Reward(value=-0.5, reason="Malformed submission", partial=True),
|
| 298 |
)
|
| 299 |
-
score = self._grader.grade_submission(fn_name, vuln_type)
|
| 300 |
-
reward_val = self._grader.reward_for_score(score)
|
| 301 |
self._done = True
|
| 302 |
|
| 303 |
if score == 1.0:
|
|
@@ -311,7 +311,7 @@ class Task1Environment(BaseEnv):
|
|
| 311 |
f"'{vuln_type}' was not precise. Score: 0.5"
|
| 312 |
)
|
| 313 |
else:
|
| 314 |
-
correct = self._grader.get_canonical_answer()
|
| 315 |
msg = (
|
| 316 |
f"❌ INCORRECT. '{fn_name}' is not the target vulnerable function. "
|
| 317 |
f"Correct answer: {correct['function']} ({correct['vulnerability']}). Score: 0.0"
|
|
|
|
| 296 |
"Submit requires 'function_name' and 'vulnerability_type' in params.",
|
| 297 |
Reward(value=-0.5, reason="Malformed submission", partial=True),
|
| 298 |
)
|
| 299 |
+
score = self._grader.grade_submission(fn_name, vuln_type) # type: ignore
|
| 300 |
+
reward_val = self._grader.reward_for_score(score) # type: ignore
|
| 301 |
self._done = True
|
| 302 |
|
| 303 |
if score == 1.0:
|
|
|
|
| 311 |
f"'{vuln_type}' was not precise. Score: 0.5"
|
| 312 |
)
|
| 313 |
else:
|
| 314 |
+
correct = self._grader.get_canonical_answer() # type: ignore
|
| 315 |
msg = (
|
| 316 |
f"❌ INCORRECT. '{fn_name}' is not the target vulnerable function. "
|
| 317 |
f"Correct answer: {correct['function']} ({correct['vulnerability']}). Score: 0.0"
|
tasks/task1/grader.py
CHANGED
|
@@ -9,75 +9,15 @@ Deterministic grader. Score range: 0.0 – 1.0
|
|
| 9 |
"""
|
| 10 |
from __future__ import annotations
|
| 11 |
from typing import Dict, List, Optional
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
"
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
"access control", "missing access", "no access", "unauthorized",
|
| 20 |
-
"privilege", "permission", "onlyowner", "only owner",
|
| 21 |
-
"no modifier", "missing modifier", "no check", "anyone can call",
|
| 22 |
-
],
|
| 23 |
-
"integer overflow": [
|
| 24 |
-
"overflow", "integer overflow", "arithmetic overflow",
|
| 25 |
-
"safemath", "safe math", "uint overflow", "wraparound",
|
| 26 |
-
"integer underflow", "underflow",
|
| 27 |
-
],
|
| 28 |
-
"tx.origin authentication": [
|
| 29 |
-
"tx.origin", "txorigin", "tx origin", "phishing",
|
| 30 |
-
"origin authentication", "origin auth",
|
| 31 |
-
],
|
| 32 |
-
"front-running": [
|
| 33 |
-
"front-running", "frontrunning", "front running", "mev",
|
| 34 |
-
"sandwich", "mempool", "commit reveal", "commit-reveal",
|
| 35 |
-
"gas price manipulation",
|
| 36 |
-
],
|
| 37 |
-
"timestamp dependence": [
|
| 38 |
-
"timestamp", "block.timestamp", "time manipulation",
|
| 39 |
-
"miner timestamp", "time dependency", "timestamp dependence",
|
| 40 |
-
],
|
| 41 |
-
"denial of service": [
|
| 42 |
-
"denial of service", " dos", "gas limit", "unbounded loop",
|
| 43 |
-
"block gas", " oog", "out of gas", "infinite loop", "unbounded array",
|
| 44 |
-
"gas exhaustion",
|
| 45 |
-
],
|
| 46 |
-
"unchecked return value": [
|
| 47 |
-
"unchecked return", "return value", "unchecked transfer",
|
| 48 |
-
"silent failure", "safeerc20", "safe transfer", "ignored return",
|
| 49 |
-
"erc20 return",
|
| 50 |
-
],
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
def _norm(text: str) -> str:
|
| 55 |
-
return text.strip().lower()
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def _find_bucket(ground_truth_issue: str) -> Optional[str]:
|
| 59 |
-
"""
|
| 60 |
-
Longest-match keyword search to identify canonical vulnerability bucket.
|
| 61 |
-
Longest match avoids short-keyword collisions (e.g. 'auth' in 'tx.origin authentication').
|
| 62 |
-
"""
|
| 63 |
-
norm_gt = _norm(ground_truth_issue)
|
| 64 |
-
best: Optional[str] = None
|
| 65 |
-
best_len: int = 0
|
| 66 |
-
for canonical, keywords in VULN_KEYWORDS.items():
|
| 67 |
-
for kw in keywords:
|
| 68 |
-
if kw in norm_gt and len(kw) > best_len:
|
| 69 |
-
best_len = len(kw)
|
| 70 |
-
best = canonical
|
| 71 |
-
return best
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
def match_vuln_keyword(submitted: str, ground_truth_issue: str) -> bool:
|
| 75 |
-
bucket = _find_bucket(ground_truth_issue)
|
| 76 |
-
if bucket is None:
|
| 77 |
-
return _norm(submitted) in _norm(ground_truth_issue)
|
| 78 |
-
norm_sub = _norm(submitted)
|
| 79 |
-
return any(kw in norm_sub for kw in VULN_KEYWORDS[bucket])
|
| 80 |
-
|
| 81 |
|
| 82 |
class Task1Grader:
|
| 83 |
def __init__(self, target_function: str, vulnerability_issue: str) -> None:
|
|
@@ -87,7 +27,7 @@ class Task1Grader:
|
|
| 87 |
def grade_submission(self, submitted_function: str, submitted_vuln_type: str) -> float:
|
| 88 |
if submitted_function.strip().lower() != self.target_function:
|
| 89 |
return 0.0
|
| 90 |
-
return 1.0 if
|
| 91 |
|
| 92 |
def reward_for_score(self, score: float) -> float:
|
| 93 |
if score == 1.0: return 5.0
|
|
|
|
| 9 |
"""
|
| 10 |
from __future__ import annotations
|
| 11 |
from typing import Dict, List, Optional
|
| 12 |
+
from utils.matcher import match_strings
|
| 13 |
+
from data.data_loader import load_vulnerabilities
|
| 14 |
|
| 15 |
+
def match_vuln_keywords(submitted: str, expected: str) -> bool:
|
| 16 |
+
"""Checks if the submitted vulnerability type matches the expected one using keyword matching."""
|
| 17 |
+
for types in load_vulnerabilities():
|
| 18 |
+
if types["vulnerability"] == expected:
|
| 19 |
+
return match_strings(types["terms"], submitted)
|
| 20 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
class Task1Grader:
|
| 23 |
def __init__(self, target_function: str, vulnerability_issue: str) -> None:
|
|
|
|
| 27 |
def grade_submission(self, submitted_function: str, submitted_vuln_type: str) -> float:
|
| 28 |
if submitted_function.strip().lower() != self.target_function:
|
| 29 |
return 0.0
|
| 30 |
+
return 1.0 if match_vuln_keywords(submitted_vuln_type, self.vulnerability_issue) else 0.5
|
| 31 |
|
| 32 |
def reward_for_score(self, score: float) -> float:
|
| 33 |
if score == 1.0: return 5.0
|
utils/__init__.py
CHANGED
|
@@ -1 +1,4 @@
|
|
| 1 |
# utils package
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# utils package
|
| 2 |
+
from utils.matcher import match_strings
|
| 3 |
+
|
| 4 |
+
_all__ = ["match_strings"]
|
utils/matcher.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
|
| 3 |
+
def match_strings(terms: List[str], phrase: str, threshold: float = 0.6) -> bool:
|
| 4 |
+
"""
|
| 5 |
+
Determine if a short string (2-3 words) is likely present in a list of terms
|
| 6 |
+
using token set similarity
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
terms: List of term strings (preferably lowercased).
|
| 10 |
+
phrase: Input phrase (2-3 words, case-insensitive).
|
| 11 |
+
threshold: Minimum Jaccard similarity to count as a match (default 0.6).
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
True if any term in the list has token set similarity >= threshold.
|
| 15 |
+
"""
|
| 16 |
+
phrase_lower = phrase.lower().strip()
|
| 17 |
+
phrase_tokens = set(phrase_lower.split())
|
| 18 |
+
|
| 19 |
+
for term in terms:
|
| 20 |
+
term_tokens = set(term.split())
|
| 21 |
+
# Jaccard similarity = size of intersection / size of union
|
| 22 |
+
if not phrase_tokens or not term_tokens:
|
| 23 |
+
continue
|
| 24 |
+
intersection = phrase_tokens & term_tokens
|
| 25 |
+
union = phrase_tokens | term_tokens
|
| 26 |
+
similarity = len(intersection) / len(union)
|
| 27 |
+
if similarity >= threshold:
|
| 28 |
+
return True
|
| 29 |
+
return False
|