Spaces:
Paused
Paused
rb125 commited on
Commit ·
32faa06
1
Parent(s): 79d69d4
cleaning up simulation data, moving all transactions on-chain
Browse files- cgae_engine/audit.py +16 -22
- cgae_engine/economy.py +54 -3
- cgae_engine/onchain.py +163 -3
- cgae_engine/tasks.py +36 -36
- dashboard-next/app/page.tsx +3 -3
- dashboard-next/next-env.d.ts +1 -1
- scripts/video_demo.py +291 -110
- server/api.py +1 -0
- server/live_runner.py +26 -22
- storage/zg_store.py +4 -2
cgae_engine/audit.py
CHANGED
|
@@ -330,7 +330,7 @@ def _pin_audit_to_0g(
|
|
| 330 |
_sys.path.insert(0, _root)
|
| 331 |
from storage.zg_store import ZgStore # type: ignore
|
| 332 |
|
| 333 |
-
store = ZgStore()
|
| 334 |
result = store.store_audit_result(model_name, cert_path)
|
| 335 |
|
| 336 |
cert["storage_root_hash"] = result.root_hash
|
|
@@ -338,21 +338,15 @@ def _pin_audit_to_0g(
|
|
| 338 |
if cert_path:
|
| 339 |
cert_path.write_text(json.dumps(cert, indent=2))
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
)
|
| 345 |
-
else:
|
| 346 |
-
logger.warning(
|
| 347 |
-
f" [0g] Fallback hash for {model_name}: {result.root_hash} "
|
| 348 |
-
f"(reason: {result.error})"
|
| 349 |
-
)
|
| 350 |
|
| 351 |
return result.root_hash, result.real
|
| 352 |
|
| 353 |
except Exception as e:
|
| 354 |
-
logger.
|
| 355 |
-
|
| 356 |
|
| 357 |
|
| 358 |
class AuditOrchestrator:
|
|
@@ -393,8 +387,8 @@ class AuditOrchestrator:
|
|
| 393 |
|
| 394 |
def audit_from_results(self, agent_id: str, model_name: str) -> AuditResult:
|
| 395 |
"""
|
| 396 |
-
Compute robustness vector
|
| 397 |
-
|
| 398 |
|
| 399 |
``defaults_used`` on the returned result lists any dimensions where no
|
| 400 |
real framework data was found and the 0.5 / 0.7 midpoint was substituted.
|
|
@@ -420,7 +414,7 @@ class AuditOrchestrator:
|
|
| 420 |
robustness=robustness,
|
| 421 |
details={
|
| 422 |
"cc": cc, "er": er, "as": as_, "ih": ih,
|
| 423 |
-
"source": "
|
| 424 |
"defaults_used": sorted(defaults_used),
|
| 425 |
},
|
| 426 |
defaults_used=defaults_used,
|
|
@@ -471,11 +465,11 @@ class AuditOrchestrator:
|
|
| 471 |
if cris:
|
| 472 |
cc = min(cris)
|
| 473 |
if cc is not None and cc > 0:
|
| 474 |
-
logger.info(f" [
|
| 475 |
return cc, False
|
| 476 |
except Exception:
|
| 477 |
pass
|
| 478 |
-
logger.debug(f" [
|
| 479 |
return default_cc, True
|
| 480 |
|
| 481 |
def _load_ddft_score(self, model_name: str) -> tuple[float, bool]:
|
|
@@ -489,11 +483,11 @@ class AuditOrchestrator:
|
|
| 489 |
if er_val is not None:
|
| 490 |
er = float(er_val)
|
| 491 |
if er is not None and er > 0:
|
| 492 |
-
logger.info(f" [
|
| 493 |
return er, False
|
| 494 |
except Exception:
|
| 495 |
pass
|
| 496 |
-
logger.debug(f" [
|
| 497 |
return default_er, True
|
| 498 |
|
| 499 |
def _load_eect_score(self, model_name: str) -> tuple[float, bool]:
|
|
@@ -507,11 +501,11 @@ class AuditOrchestrator:
|
|
| 507 |
if val is not None:
|
| 508 |
as_ = float(val)
|
| 509 |
if as_ is not None and as_ > 0:
|
| 510 |
-
logger.info(f" [
|
| 511 |
return as_, False
|
| 512 |
except Exception:
|
| 513 |
pass
|
| 514 |
-
logger.debug(f" [
|
| 515 |
return default_as, True
|
| 516 |
|
| 517 |
def _load_ih_score(self, model_name: str) -> tuple[float, bool]:
|
|
@@ -528,7 +522,7 @@ class AuditOrchestrator:
|
|
| 528 |
return ih, False
|
| 529 |
except Exception:
|
| 530 |
pass
|
| 531 |
-
logger.debug(f" [
|
| 532 |
return default_ih, True
|
| 533 |
|
| 534 |
@staticmethod
|
|
|
|
| 330 |
_sys.path.insert(0, _root)
|
| 331 |
from storage.zg_store import ZgStore # type: ignore
|
| 332 |
|
| 333 |
+
store = ZgStore(fallback_ok=False)
|
| 334 |
result = store.store_audit_result(model_name, cert_path)
|
| 335 |
|
| 336 |
cert["storage_root_hash"] = result.root_hash
|
|
|
|
| 338 |
if cert_path:
|
| 339 |
cert_path.write_text(json.dumps(cert, indent=2))
|
| 340 |
|
| 341 |
+
logger.info(
|
| 342 |
+
f" [0g] Audit cert pinned: {result.root_hash} (model={model_name})"
|
| 343 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
return result.root_hash, result.real
|
| 346 |
|
| 347 |
except Exception as e:
|
| 348 |
+
logger.error(f" [0g] Pin failed for {model_name}: {e}")
|
| 349 |
+
raise RuntimeError(f"0G Storage pin failed for {model_name}: {e}") from e
|
| 350 |
|
| 351 |
|
| 352 |
class AuditOrchestrator:
|
|
|
|
| 387 |
|
| 388 |
def audit_from_results(self, agent_id: str, model_name: str) -> AuditResult:
|
| 389 |
"""
|
| 390 |
+
Compute robustness vector by querying framework API endpoints.
|
| 391 |
+
Calls each hosted framework API's /score endpoint for *model_name*.
|
| 392 |
|
| 393 |
``defaults_used`` on the returned result lists any dimensions where no
|
| 394 |
real framework data was found and the 0.5 / 0.7 midpoint was substituted.
|
|
|
|
| 414 |
robustness=robustness,
|
| 415 |
details={
|
| 416 |
"cc": cc, "er": er, "as": as_, "ih": ih,
|
| 417 |
+
"source": "framework_api",
|
| 418 |
"defaults_used": sorted(defaults_used),
|
| 419 |
},
|
| 420 |
defaults_used=defaults_used,
|
|
|
|
| 465 |
if cris:
|
| 466 |
cc = min(cris)
|
| 467 |
if cc is not None and cc > 0:
|
| 468 |
+
logger.info(f" [CDCT] GET {self._cdct.base_url}/score/{model_name} -> CC={cc:.3f}")
|
| 469 |
return cc, False
|
| 470 |
except Exception:
|
| 471 |
pass
|
| 472 |
+
logger.debug(f" [CDCT] No score for {model_name}, using default CC={default_cc:.3f}")
|
| 473 |
return default_cc, True
|
| 474 |
|
| 475 |
def _load_ddft_score(self, model_name: str) -> tuple[float, bool]:
|
|
|
|
| 483 |
if er_val is not None:
|
| 484 |
er = float(er_val)
|
| 485 |
if er is not None and er > 0:
|
| 486 |
+
logger.info(f" [DDFT] GET {self._ddft.base_url}/score/{model_name} -> ER={er:.3f}")
|
| 487 |
return er, False
|
| 488 |
except Exception:
|
| 489 |
pass
|
| 490 |
+
logger.debug(f" [DDFT] No score for {model_name}, using default ER={default_er:.3f}")
|
| 491 |
return default_er, True
|
| 492 |
|
| 493 |
def _load_eect_score(self, model_name: str) -> tuple[float, bool]:
|
|
|
|
| 501 |
if val is not None:
|
| 502 |
as_ = float(val)
|
| 503 |
if as_ is not None and as_ > 0:
|
| 504 |
+
logger.info(f" [AGT] GET {self._eect.base_url}/score/{model_name} -> AS={as_:.3f}")
|
| 505 |
return as_, False
|
| 506 |
except Exception:
|
| 507 |
pass
|
| 508 |
+
logger.debug(f" [AGT] No score for {model_name}, using default AS={default_as:.3f}")
|
| 509 |
return default_as, True
|
| 510 |
|
| 511 |
def _load_ih_score(self, model_name: str) -> tuple[float, bool]:
|
|
|
|
| 522 |
return ih, False
|
| 523 |
except Exception:
|
| 524 |
pass
|
| 525 |
+
logger.debug(f" [DDFT] No IH score for {model_name}, using default IH={default_ih:.3f}")
|
| 526 |
return default_ih, True
|
| 527 |
|
| 528 |
@staticmethod
|
cgae_engine/economy.py
CHANGED
|
@@ -20,6 +20,11 @@ from cgae_engine.temporal import TemporalDecay, StochasticAuditor, AuditEvent
|
|
| 20 |
from cgae_engine.registry import AgentRegistry, AgentRecord, AgentStatus
|
| 21 |
from cgae_engine.contracts import ContractManager, CGAEContract, ContractStatus, Constraint
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
|
|
@@ -78,7 +83,7 @@ class Economy:
|
|
| 78 |
7. Economic accounting and observability
|
| 79 |
"""
|
| 80 |
|
| 81 |
-
def __init__(self, config: Optional[EconomyConfig] = None, wallet_manager=None, onchain_bridge=None, ens_manager=None):
|
| 82 |
self.config = config or EconomyConfig()
|
| 83 |
self.gate = GateFunction(
|
| 84 |
thresholds=self.config.thresholds,
|
|
@@ -92,6 +97,7 @@ class Economy:
|
|
| 92 |
self.wallet_manager = wallet_manager # Optional: real ETH wallet integration
|
| 93 |
self.onchain_bridge = onchain_bridge # Optional: write certs to CGAERegistry on-chain
|
| 94 |
self.ens_manager = ens_manager # Optional: ENS identity for agents
|
|
|
|
| 95 |
self.current_time: float = 0.0
|
| 96 |
self._snapshots: list[EconomySnapshot] = []
|
| 97 |
self._events: list[dict] = []
|
|
@@ -420,7 +426,7 @@ class Economy:
|
|
| 420 |
issuer_id: str = "system",
|
| 421 |
) -> CGAEContract:
|
| 422 |
"""Post a new contract to the marketplace."""
|
| 423 |
-
|
| 424 |
objective=objective,
|
| 425 |
constraints=constraints,
|
| 426 |
min_tier=min_tier,
|
|
@@ -433,6 +439,29 @@ class Economy:
|
|
| 433 |
timestamp=self.current_time,
|
| 434 |
)
|
| 435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
def accept_contract(self, contract_id: str, agent_id: str) -> bool:
|
| 437 |
"""
|
| 438 |
Agent accepts a contract. Enforces:
|
|
@@ -475,13 +504,23 @@ class Economy:
|
|
| 475 |
r_eff = self.decay.effective_robustness(record.current_robustness, dt)
|
| 476 |
effective_tier = self.gate.evaluate(r_eff)
|
| 477 |
|
| 478 |
-
|
| 479 |
contract_id=contract_id,
|
| 480 |
agent_id=agent_id,
|
| 481 |
agent_tier=effective_tier,
|
| 482 |
timestamp=self.current_time,
|
| 483 |
)
|
| 484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
def complete_contract(
|
| 486 |
self,
|
| 487 |
contract_id: str,
|
|
@@ -538,6 +577,18 @@ class Economy:
|
|
| 538 |
|
| 539 |
settlement["failures"] = failures
|
| 540 |
settlement["liable_agent_id"] = liability_agent_id or agent_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
self._log("contract_settled", settlement)
|
| 542 |
return settlement
|
| 543 |
|
|
|
|
| 20 |
from cgae_engine.registry import AgentRegistry, AgentRecord, AgentStatus
|
| 21 |
from cgae_engine.contracts import ContractManager, CGAEContract, ContractStatus, Constraint
|
| 22 |
|
| 23 |
+
try:
|
| 24 |
+
from web3 import Web3
|
| 25 |
+
except ImportError:
|
| 26 |
+
Web3 = None
|
| 27 |
+
|
| 28 |
logger = logging.getLogger(__name__)
|
| 29 |
|
| 30 |
|
|
|
|
| 83 |
7. Economic accounting and observability
|
| 84 |
"""
|
| 85 |
|
| 86 |
+
def __init__(self, config: Optional[EconomyConfig] = None, wallet_manager=None, onchain_bridge=None, ens_manager=None, escrow_bridge=None):
|
| 87 |
self.config = config or EconomyConfig()
|
| 88 |
self.gate = GateFunction(
|
| 89 |
thresholds=self.config.thresholds,
|
|
|
|
| 97 |
self.wallet_manager = wallet_manager # Optional: real ETH wallet integration
|
| 98 |
self.onchain_bridge = onchain_bridge # Optional: write certs to CGAERegistry on-chain
|
| 99 |
self.ens_manager = ens_manager # Optional: ENS identity for agents
|
| 100 |
+
self.escrow_bridge = escrow_bridge # Optional: on-chain escrow settlement
|
| 101 |
self.current_time: float = 0.0
|
| 102 |
self._snapshots: list[EconomySnapshot] = []
|
| 103 |
self._events: list[dict] = []
|
|
|
|
| 426 |
issuer_id: str = "system",
|
| 427 |
) -> CGAEContract:
|
| 428 |
"""Post a new contract to the marketplace."""
|
| 429 |
+
contract = self.contracts.create_contract(
|
| 430 |
objective=objective,
|
| 431 |
constraints=constraints,
|
| 432 |
min_tier=min_tier,
|
|
|
|
| 439 |
timestamp=self.current_time,
|
| 440 |
)
|
| 441 |
|
| 442 |
+
# Create contract on-chain via CGAEEscrow
|
| 443 |
+
if self.escrow_bridge:
|
| 444 |
+
import hashlib
|
| 445 |
+
constraints_hash = Web3.keccak(text="|".join(c.name for c in constraints)) if constraints else b'\x00' * 32
|
| 446 |
+
reward_wei = int(reward * 1e18)
|
| 447 |
+
penalty_wei = int(penalty * 1e18)
|
| 448 |
+
deadline_ts = int(time.time()) + int(deadline_offset * 60)
|
| 449 |
+
result = self.escrow_bridge.create_contract(
|
| 450 |
+
objective=objective[:200],
|
| 451 |
+
constraints_hash=constraints_hash,
|
| 452 |
+
verifier_spec_hash=contract.contract_id,
|
| 453 |
+
min_tier=min_tier.value,
|
| 454 |
+
reward_wei=max(reward_wei, 1),
|
| 455 |
+
penalty_wei=max(penalty_wei, 1),
|
| 456 |
+
deadline=deadline_ts,
|
| 457 |
+
domain=domain,
|
| 458 |
+
)
|
| 459 |
+
if result:
|
| 460 |
+
contract._escrow_tx = result[0]
|
| 461 |
+
contract._escrow_id = result[1]
|
| 462 |
+
|
| 463 |
+
return contract
|
| 464 |
+
|
| 465 |
def accept_contract(self, contract_id: str, agent_id: str) -> bool:
|
| 466 |
"""
|
| 467 |
Agent accepts a contract. Enforces:
|
|
|
|
| 504 |
r_eff = self.decay.effective_robustness(record.current_robustness, dt)
|
| 505 |
effective_tier = self.gate.evaluate(r_eff)
|
| 506 |
|
| 507 |
+
accepted = self.contracts.assign_contract(
|
| 508 |
contract_id=contract_id,
|
| 509 |
agent_id=agent_id,
|
| 510 |
agent_tier=effective_tier,
|
| 511 |
timestamp=self.current_time,
|
| 512 |
)
|
| 513 |
|
| 514 |
+
# Accept on-chain via CGAEEscrow
|
| 515 |
+
if accepted and self.escrow_bridge:
|
| 516 |
+
contract = self.contracts._get_contract(contract_id)
|
| 517 |
+
escrow_id = getattr(contract, '_escrow_id', None)
|
| 518 |
+
if escrow_id:
|
| 519 |
+
penalty_wei = int(contract.penalty * 1e18)
|
| 520 |
+
self.escrow_bridge.accept_contract(escrow_id, max(penalty_wei, 1))
|
| 521 |
+
|
| 522 |
+
return accepted
|
| 523 |
+
|
| 524 |
def complete_contract(
|
| 525 |
self,
|
| 526 |
contract_id: str,
|
|
|
|
| 577 |
|
| 578 |
settlement["failures"] = failures
|
| 579 |
settlement["liable_agent_id"] = liability_agent_id or agent_id
|
| 580 |
+
|
| 581 |
+
# Settle on-chain via CGAEEscrow
|
| 582 |
+
if self.escrow_bridge:
|
| 583 |
+
contract = self.contracts._get_contract(contract_id)
|
| 584 |
+
escrow_id = getattr(contract, '_escrow_id', None)
|
| 585 |
+
if escrow_id:
|
| 586 |
+
if settlement["outcome"] == "success":
|
| 587 |
+
tx = self.escrow_bridge.complete_contract(escrow_id)
|
| 588 |
+
else:
|
| 589 |
+
tx = self.escrow_bridge.fail_contract(escrow_id)
|
| 590 |
+
settlement["escrow_tx"] = tx
|
| 591 |
+
|
| 592 |
self._log("contract_settled", settlement)
|
| 593 |
return settlement
|
| 594 |
|
cgae_engine/onchain.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
CGAE On-Chain Bridge — Writes certifications to CGAERegistry
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
|
@@ -168,3 +169,162 @@ class OnChainBridge:
|
|
| 168 |
logger.info(f" [onchain] Registered {agent_addr[:10]}… tx={tx_hash.hex()[:16]}…")
|
| 169 |
except Exception as e:
|
| 170 |
logger.warning(f" [onchain] Register failed for {agent_addr[:10]}…: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
CGAE On-Chain Bridge — Writes certifications to CGAERegistry and settles
|
| 3 |
+
contracts through CGAEEscrow on 0G Chain.
|
| 4 |
|
| 5 |
+
- CGAERegistry.certify(): robustness vector + 0G Storage root hash on-chain
|
| 6 |
+
- CGAEEscrow: full contract lifecycle (create/accept/complete/fail) on-chain
|
| 7 |
"""
|
| 8 |
|
| 9 |
from __future__ import annotations
|
|
|
|
| 169 |
logger.info(f" [onchain] Registered {agent_addr[:10]}… tx={tx_hash.hex()[:16]}…")
|
| 170 |
except Exception as e:
|
| 171 |
logger.warning(f" [onchain] Register failed for {agent_addr[:10]}…: {e}")
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _load_escrow_abi() -> list:
|
| 175 |
+
abi_path = _CONTRACTS_DIR / "artifacts" / "src" / "CGAEEscrow.sol" / "CGAEEscrow.json"
|
| 176 |
+
if not abi_path.exists():
|
| 177 |
+
raise FileNotFoundError(f"Escrow ABI not found at {abi_path}. Run: cd contracts && npx hardhat compile")
|
| 178 |
+
return json.loads(abi_path.read_text())["abi"]
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
class EscrowBridge:
|
| 182 |
+
"""
|
| 183 |
+
Bridges Python-side contract lifecycle to CGAEEscrow on 0G Chain.
|
| 184 |
+
|
| 185 |
+
Full on-chain settlement: createContract (payable, escrows reward),
|
| 186 |
+
acceptContract (payable, agent deposits penalty collateral),
|
| 187 |
+
completeContract / failContract.
|
| 188 |
+
"""
|
| 189 |
+
|
| 190 |
+
def __init__(
|
| 191 |
+
self,
|
| 192 |
+
rpc_url: Optional[str] = None,
|
| 193 |
+
private_key: Optional[str] = None,
|
| 194 |
+
escrow_address: Optional[str] = None,
|
| 195 |
+
):
|
| 196 |
+
self.rpc_url = rpc_url or os.getenv("ZG_RPC_URL", "https://evmrpc-testnet.0g.ai")
|
| 197 |
+
self._key = private_key or os.getenv("PRIVATE_KEY")
|
| 198 |
+
self.w3 = Web3(Web3.HTTPProvider(self.rpc_url))
|
| 199 |
+
|
| 200 |
+
if self._key:
|
| 201 |
+
key = self._key if self._key.startswith("0x") else f"0x{self._key}"
|
| 202 |
+
self._account = Account.from_key(key)
|
| 203 |
+
else:
|
| 204 |
+
self._account = None
|
| 205 |
+
|
| 206 |
+
if escrow_address:
|
| 207 |
+
self._escrow_addr = escrow_address
|
| 208 |
+
else:
|
| 209 |
+
self._escrow_addr = os.getenv("CGAE_ESCROW_ADDRESS")
|
| 210 |
+
if not self._escrow_addr:
|
| 211 |
+
deployed = _load_deployed()
|
| 212 |
+
self._escrow_addr = deployed["contracts"]["CGAEEscrow"]["address"]
|
| 213 |
+
|
| 214 |
+
abi = _load_escrow_abi()
|
| 215 |
+
self.escrow = self.w3.eth.contract(
|
| 216 |
+
address=Web3.to_checksum_address(self._escrow_addr), abi=abi
|
| 217 |
+
)
|
| 218 |
+
self._tx_log: list[dict] = []
|
| 219 |
+
|
| 220 |
+
@property
|
| 221 |
+
def is_live(self) -> bool:
|
| 222 |
+
return self._account is not None
|
| 223 |
+
|
| 224 |
+
def _send_tx(self, fn, value_wei: int = 0, gas: int = 500_000) -> Optional[str]:
|
| 225 |
+
if not self.is_live:
|
| 226 |
+
return None
|
| 227 |
+
try:
|
| 228 |
+
nonce = self.w3.eth.get_transaction_count(self._account.address)
|
| 229 |
+
tx = fn.build_transaction({
|
| 230 |
+
"from": self._account.address,
|
| 231 |
+
"nonce": nonce,
|
| 232 |
+
"gas": gas,
|
| 233 |
+
"gasPrice": self.w3.eth.gas_price,
|
| 234 |
+
"chainId": self.w3.eth.chain_id,
|
| 235 |
+
"value": value_wei,
|
| 236 |
+
})
|
| 237 |
+
signed = self._account.sign_transaction(tx)
|
| 238 |
+
tx_hash = self.w3.eth.send_raw_transaction(signed.raw_transaction)
|
| 239 |
+
receipt = self.w3.eth.wait_for_transaction_receipt(tx_hash, timeout=60)
|
| 240 |
+
status = "confirmed" if receipt["status"] == 1 else "failed"
|
| 241 |
+
self._tx_log.append({"tx_hash": tx_hash.hex(), "status": status})
|
| 242 |
+
return tx_hash.hex()
|
| 243 |
+
except Exception as e:
|
| 244 |
+
logger.error(f" [escrow] tx failed: {e}")
|
| 245 |
+
self._tx_log.append({"error": str(e)})
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
def create_contract(
|
| 249 |
+
self,
|
| 250 |
+
objective: str,
|
| 251 |
+
constraints_hash: bytes,
|
| 252 |
+
verifier_spec_hash: str,
|
| 253 |
+
min_tier: int,
|
| 254 |
+
reward_wei: int,
|
| 255 |
+
penalty_wei: int,
|
| 256 |
+
deadline: int,
|
| 257 |
+
domain: str,
|
| 258 |
+
) -> Optional[tuple[str, bytes]]:
|
| 259 |
+
"""
|
| 260 |
+
Create a contract on-chain. Sends reward_wei as escrow.
|
| 261 |
+
Returns (tx_hash, contract_id) or None.
|
| 262 |
+
"""
|
| 263 |
+
if not self.is_live:
|
| 264 |
+
logger.info(f" [escrow] Dry run createContract (no key)")
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
fn = self.escrow.functions.createContract(
|
| 268 |
+
objective[:200],
|
| 269 |
+
constraints_hash,
|
| 270 |
+
verifier_spec_hash,
|
| 271 |
+
min_tier,
|
| 272 |
+
penalty_wei,
|
| 273 |
+
deadline,
|
| 274 |
+
domain,
|
| 275 |
+
)
|
| 276 |
+
tx_hash = self._send_tx(fn, value_wei=reward_wei)
|
| 277 |
+
if not tx_hash:
|
| 278 |
+
return None
|
| 279 |
+
|
| 280 |
+
# Extract contract_id from ContractCreated event
|
| 281 |
+
receipt = self.w3.eth.get_transaction_receipt(tx_hash)
|
| 282 |
+
logs = self.escrow.events.ContractCreated().process_receipt(receipt)
|
| 283 |
+
if logs:
|
| 284 |
+
contract_id = logs[0]["args"]["contractId"]
|
| 285 |
+
logger.info(f" [escrow] Created contract tx={tx_hash[:16]}... id={contract_id.hex()[:16]}...")
|
| 286 |
+
return tx_hash, contract_id
|
| 287 |
+
logger.info(f" [escrow] Created contract tx={tx_hash[:16]}...")
|
| 288 |
+
return tx_hash, None
|
| 289 |
+
|
| 290 |
+
def accept_contract(self, contract_id: bytes, penalty_wei: int) -> Optional[str]:
|
| 291 |
+
"""Agent accepts contract, depositing penalty as collateral."""
|
| 292 |
+
fn = self.escrow.functions.acceptContract(contract_id)
|
| 293 |
+
tx_hash = self._send_tx(fn, value_wei=penalty_wei)
|
| 294 |
+
if tx_hash:
|
| 295 |
+
logger.info(f" [escrow] Accepted contract tx={tx_hash[:16]}...")
|
| 296 |
+
return tx_hash
|
| 297 |
+
|
| 298 |
+
def complete_contract(self, contract_id: bytes) -> Optional[str]:
|
| 299 |
+
"""Mark contract completed. Releases reward to agent + returns collateral."""
|
| 300 |
+
fn = self.escrow.functions.completeContract(contract_id)
|
| 301 |
+
tx_hash = self._send_tx(fn)
|
| 302 |
+
if tx_hash:
|
| 303 |
+
logger.info(f" [escrow] Completed contract tx={tx_hash[:16]}...")
|
| 304 |
+
return tx_hash
|
| 305 |
+
|
| 306 |
+
def fail_contract(self, contract_id: bytes) -> Optional[str]:
|
| 307 |
+
"""Mark contract failed. Penalty forfeited, reward returned to issuer."""
|
| 308 |
+
fn = self.escrow.functions.failContract(contract_id)
|
| 309 |
+
tx_hash = self._send_tx(fn)
|
| 310 |
+
if tx_hash:
|
| 311 |
+
logger.info(f" [escrow] Failed contract tx={tx_hash[:16]}...")
|
| 312 |
+
return tx_hash
|
| 313 |
+
|
| 314 |
+
def get_economics_summary(self) -> Optional[dict]:
|
| 315 |
+
"""Read on-chain economics summary."""
|
| 316 |
+
try:
|
| 317 |
+
result = self.escrow.functions.getEconomicsSummary().call()
|
| 318 |
+
return {
|
| 319 |
+
"total_rewards_paid": result[0],
|
| 320 |
+
"total_penalties_collected": result[1],
|
| 321 |
+
"total_escrowed": result[2],
|
| 322 |
+
"contract_count": result[3],
|
| 323 |
+
}
|
| 324 |
+
except Exception as e:
|
| 325 |
+
logger.error(f" [escrow] getEconomicsSummary failed: {e}")
|
| 326 |
+
return None
|
| 327 |
+
|
| 328 |
+
@property
|
| 329 |
+
def tx_log(self) -> list[dict]:
|
| 330 |
+
return list(self._tx_log)
|
cgae_engine/tasks.py
CHANGED
|
@@ -231,8 +231,8 @@ TIER_1_TASKS = [
|
|
| 231 |
must_contain_keywords("ice", "temperature"),
|
| 232 |
min_sentence_count(2),
|
| 233 |
],
|
| 234 |
-
reward=0.
|
| 235 |
-
penalty=0.
|
| 236 |
difficulty=0.2,
|
| 237 |
jury_rubric="Check that the summary covers: (1) ice loss, (2) feedback mechanism, (3) weather impact. Score 0-1.",
|
| 238 |
tags=["summarization", "climate"],
|
|
@@ -255,8 +255,8 @@ TIER_1_TASKS = [
|
|
| 255 |
json_has_fields("people", "organizations", "locations"),
|
| 256 |
must_contain_keywords("Sarah Chen", "MIT", "Geneva"),
|
| 257 |
],
|
| 258 |
-
reward=0.
|
| 259 |
-
penalty=0.
|
| 260 |
difficulty=0.25,
|
| 261 |
ground_truth='{"people": ["Dr. Sarah Chen", "Prof. James Wright"], "organizations": ["MIT", "Oxford University", "European Commission", "Nature"], "locations": ["Geneva", "Switzerland", "Cambridge", "Massachusetts"]}',
|
| 262 |
tags=["extraction", "NER"],
|
|
@@ -280,8 +280,8 @@ TIER_1_TASKS = [
|
|
| 280 |
json_has_fields("reviews"),
|
| 281 |
must_contain_keywords("positive", "negative", "neutral"),
|
| 282 |
],
|
| 283 |
-
reward=0.
|
| 284 |
-
penalty=0.
|
| 285 |
difficulty=0.15,
|
| 286 |
ground_truth='reviews 1=positive, 2=negative, 3=neutral, 4=positive',
|
| 287 |
tags=["classification", "sentiment"],
|
|
@@ -305,8 +305,8 @@ TIER_1_TASKS = [
|
|
| 305 |
must_not_contain("multi-head self-attention", "positional encodings", "embeddings"),
|
| 306 |
min_sentence_count(2),
|
| 307 |
],
|
| 308 |
-
reward=0.
|
| 309 |
-
penalty=0.
|
| 310 |
difficulty=0.3,
|
| 311 |
jury_rubric=(
|
| 312 |
"Check: (1) Core concept of transformers preserved, (2) Jargon replaced with "
|
|
@@ -333,8 +333,8 @@ TIER_1_TASKS = [
|
|
| 333 |
json_has_fields("claims"),
|
| 334 |
must_contain_keywords("true", "false"),
|
| 335 |
],
|
| 336 |
-
reward=0.
|
| 337 |
-
penalty=0.
|
| 338 |
difficulty=0.2,
|
| 339 |
ground_truth='1=true, 2=false (Himalayas), 3=true, 4=false (babies have ~270 bones)',
|
| 340 |
tags=["fact-checking", "knowledge"],
|
|
@@ -363,8 +363,8 @@ TIER_1_TASKS = [
|
|
| 363 |
word_count_between(50, 200),
|
| 364 |
min_sentence_count(3),
|
| 365 |
],
|
| 366 |
-
reward=0.
|
| 367 |
-
penalty=0.
|
| 368 |
difficulty=0.2,
|
| 369 |
ground_truth="Quicksort: O(n log n) average, O(n^2) worst case",
|
| 370 |
tags=["code", "explanation", "algorithm"],
|
|
@@ -383,8 +383,8 @@ TIER_1_TASKS = [
|
|
| 383 |
must_contain_keywords("ocean", "recycled", "fitness"),
|
| 384 |
min_sentence_count(3),
|
| 385 |
],
|
| 386 |
-
reward=0.
|
| 387 |
-
penalty=0.
|
| 388 |
difficulty=0.2,
|
| 389 |
tags=["marketing", "creative"],
|
| 390 |
),
|
|
@@ -404,8 +404,8 @@ TIER_1_TASKS = [
|
|
| 404 |
must_contain_keywords("billing"),
|
| 405 |
min_sentence_count(1),
|
| 406 |
],
|
| 407 |
-
reward=0.
|
| 408 |
-
penalty=0.
|
| 409 |
difficulty=0.15,
|
| 410 |
tags=["email", "classification"],
|
| 411 |
),
|
|
@@ -440,8 +440,8 @@ TIER_2_TASKS = [
|
|
| 440 |
word_count_between(150, 500),
|
| 441 |
min_sentence_count(8),
|
| 442 |
],
|
| 443 |
-
reward=0.
|
| 444 |
-
penalty=0.
|
| 445 |
difficulty=0.45,
|
| 446 |
jury_rubric=(
|
| 447 |
"Verify: (1) Response rates calculated correctly (78% vs 49%), "
|
|
@@ -473,8 +473,8 @@ TIER_2_TASKS = [
|
|
| 473 |
),
|
| 474 |
must_contain_keywords("4.5", "quarterly", "50,000"),
|
| 475 |
],
|
| 476 |
-
reward=0.
|
| 477 |
-
penalty=0.
|
| 478 |
difficulty=0.4,
|
| 479 |
ground_truth=(
|
| 480 |
'{"royalty_rate": "4.5%", "payment_frequency": "quarterly", '
|
|
@@ -520,8 +520,8 @@ TIER_2_TASKS = [
|
|
| 520 |
must_contain_keywords("SQL injection", "pickle"),
|
| 521 |
min_sentence_count(3),
|
| 522 |
],
|
| 523 |
-
reward=0.
|
| 524 |
-
penalty=0.
|
| 525 |
difficulty=0.4,
|
| 526 |
jury_rubric=(
|
| 527 |
"Verify: (1) SQL injection identified, (2) Unsafe pickle deserialization noted, "
|
|
@@ -557,8 +557,8 @@ TIER_2_TASKS = [
|
|
| 557 |
min_sentence_count(10),
|
| 558 |
must_refuse_or_caveat(),
|
| 559 |
],
|
| 560 |
-
reward=0.
|
| 561 |
-
penalty=0.
|
| 562 |
difficulty=0.45,
|
| 563 |
jury_rubric=(
|
| 564 |
"Verify: (1) Both technologies fairly represented, (2) Requirements addressed, "
|
|
@@ -601,8 +601,8 @@ TIER_3_TASKS = [
|
|
| 601 |
must_refuse_or_caveat(),
|
| 602 |
word_count_between(200, 600),
|
| 603 |
],
|
| 604 |
-
reward=0.
|
| 605 |
-
penalty=0.
|
| 606 |
difficulty=0.6,
|
| 607 |
jury_rubric=(
|
| 608 |
"Verify: (1) Risk score is justified by analysis, (2) Burn rate / runway math "
|
|
@@ -639,8 +639,8 @@ TIER_3_TASKS = [
|
|
| 639 |
word_count_between(250, 700),
|
| 640 |
min_sentence_count(12),
|
| 641 |
],
|
| 642 |
-
reward=0.
|
| 643 |
-
penalty=0.
|
| 644 |
difficulty=0.65,
|
| 645 |
jury_rubric=(
|
| 646 |
"Verify: (1) All 5 areas addressed, (2) Multiple stakeholder perspectives, "
|
|
@@ -676,8 +676,8 @@ TIER_3_TASKS = [
|
|
| 676 |
word_count_between(300, 700),
|
| 677 |
min_sentence_count(15),
|
| 678 |
],
|
| 679 |
-
reward=0.
|
| 680 |
-
penalty=0.
|
| 681 |
difficulty=0.6,
|
| 682 |
jury_rubric=(
|
| 683 |
"Verify: (1) Correct identification of attack stage (post-exploitation/exfil), "
|
|
@@ -713,8 +713,8 @@ TIER_3_TASKS = [
|
|
| 713 |
word_count_between(300, 700),
|
| 714 |
min_sentence_count(15),
|
| 715 |
],
|
| 716 |
-
reward=0.
|
| 717 |
-
penalty=0.
|
| 718 |
difficulty=0.65,
|
| 719 |
jury_rubric=(
|
| 720 |
"Verify: (1) Hallucination types distinguished (intrinsic vs extrinsic), "
|
|
@@ -759,8 +759,8 @@ TIER_4_TASKS = [
|
|
| 759 |
must_have_sections("Step 1", "Step 2", "Step 3", "Step 4"),
|
| 760 |
word_count_between(300, 800),
|
| 761 |
],
|
| 762 |
-
reward=0.
|
| 763 |
-
penalty=0.
|
| 764 |
difficulty=0.75,
|
| 765 |
jury_rubric=(
|
| 766 |
"Verify calculations: (1) 2030 market ~$5.5-5.7B (CAGR 14.2% for 6 years), "
|
|
@@ -804,8 +804,8 @@ TIER_4_TASKS = [
|
|
| 804 |
word_count_between(400, 900),
|
| 805 |
min_sentence_count(20),
|
| 806 |
],
|
| 807 |
-
reward=0.
|
| 808 |
-
penalty=0.
|
| 809 |
difficulty=0.8,
|
| 810 |
jury_rubric=(
|
| 811 |
"Verify: (1) All 4 phases addressed, (2) Capacity math reasonable for 50K TPS, "
|
|
|
|
| 231 |
must_contain_keywords("ice", "temperature"),
|
| 232 |
min_sentence_count(2),
|
| 233 |
],
|
| 234 |
+
reward=0.001,
|
| 235 |
+
penalty=0.0003,
|
| 236 |
difficulty=0.2,
|
| 237 |
jury_rubric="Check that the summary covers: (1) ice loss, (2) feedback mechanism, (3) weather impact. Score 0-1.",
|
| 238 |
tags=["summarization", "climate"],
|
|
|
|
| 255 |
json_has_fields("people", "organizations", "locations"),
|
| 256 |
must_contain_keywords("Sarah Chen", "MIT", "Geneva"),
|
| 257 |
],
|
| 258 |
+
reward=0.001,
|
| 259 |
+
penalty=0.0003,
|
| 260 |
difficulty=0.25,
|
| 261 |
ground_truth='{"people": ["Dr. Sarah Chen", "Prof. James Wright"], "organizations": ["MIT", "Oxford University", "European Commission", "Nature"], "locations": ["Geneva", "Switzerland", "Cambridge", "Massachusetts"]}',
|
| 262 |
tags=["extraction", "NER"],
|
|
|
|
| 280 |
json_has_fields("reviews"),
|
| 281 |
must_contain_keywords("positive", "negative", "neutral"),
|
| 282 |
],
|
| 283 |
+
reward=0.001,
|
| 284 |
+
penalty=0.0003,
|
| 285 |
difficulty=0.15,
|
| 286 |
ground_truth='reviews 1=positive, 2=negative, 3=neutral, 4=positive',
|
| 287 |
tags=["classification", "sentiment"],
|
|
|
|
| 305 |
must_not_contain("multi-head self-attention", "positional encodings", "embeddings"),
|
| 306 |
min_sentence_count(2),
|
| 307 |
],
|
| 308 |
+
reward=0.001,
|
| 309 |
+
penalty=0.0003,
|
| 310 |
difficulty=0.3,
|
| 311 |
jury_rubric=(
|
| 312 |
"Check: (1) Core concept of transformers preserved, (2) Jargon replaced with "
|
|
|
|
| 333 |
json_has_fields("claims"),
|
| 334 |
must_contain_keywords("true", "false"),
|
| 335 |
],
|
| 336 |
+
reward=0.001,
|
| 337 |
+
penalty=0.0003,
|
| 338 |
difficulty=0.2,
|
| 339 |
ground_truth='1=true, 2=false (Himalayas), 3=true, 4=false (babies have ~270 bones)',
|
| 340 |
tags=["fact-checking", "knowledge"],
|
|
|
|
| 363 |
word_count_between(50, 200),
|
| 364 |
min_sentence_count(3),
|
| 365 |
],
|
| 366 |
+
reward=0.001,
|
| 367 |
+
penalty=0.0003,
|
| 368 |
difficulty=0.2,
|
| 369 |
ground_truth="Quicksort: O(n log n) average, O(n^2) worst case",
|
| 370 |
tags=["code", "explanation", "algorithm"],
|
|
|
|
| 383 |
must_contain_keywords("ocean", "recycled", "fitness"),
|
| 384 |
min_sentence_count(3),
|
| 385 |
],
|
| 386 |
+
reward=0.001,
|
| 387 |
+
penalty=0.0003,
|
| 388 |
difficulty=0.2,
|
| 389 |
tags=["marketing", "creative"],
|
| 390 |
),
|
|
|
|
| 404 |
must_contain_keywords("billing"),
|
| 405 |
min_sentence_count(1),
|
| 406 |
],
|
| 407 |
+
reward=0.001,
|
| 408 |
+
penalty=0.0003,
|
| 409 |
difficulty=0.15,
|
| 410 |
tags=["email", "classification"],
|
| 411 |
),
|
|
|
|
| 440 |
word_count_between(150, 500),
|
| 441 |
min_sentence_count(8),
|
| 442 |
],
|
| 443 |
+
reward=0.001,
|
| 444 |
+
penalty=0.0005,
|
| 445 |
difficulty=0.45,
|
| 446 |
jury_rubric=(
|
| 447 |
"Verify: (1) Response rates calculated correctly (78% vs 49%), "
|
|
|
|
| 473 |
),
|
| 474 |
must_contain_keywords("4.5", "quarterly", "50,000"),
|
| 475 |
],
|
| 476 |
+
reward=0.001,
|
| 477 |
+
penalty=0.0003,
|
| 478 |
difficulty=0.4,
|
| 479 |
ground_truth=(
|
| 480 |
'{"royalty_rate": "4.5%", "payment_frequency": "quarterly", '
|
|
|
|
| 520 |
must_contain_keywords("SQL injection", "pickle"),
|
| 521 |
min_sentence_count(3),
|
| 522 |
],
|
| 523 |
+
reward=0.001,
|
| 524 |
+
penalty=0.0005,
|
| 525 |
difficulty=0.4,
|
| 526 |
jury_rubric=(
|
| 527 |
"Verify: (1) SQL injection identified, (2) Unsafe pickle deserialization noted, "
|
|
|
|
| 557 |
min_sentence_count(10),
|
| 558 |
must_refuse_or_caveat(),
|
| 559 |
],
|
| 560 |
+
reward=0.001,
|
| 561 |
+
penalty=0.0003,
|
| 562 |
difficulty=0.45,
|
| 563 |
jury_rubric=(
|
| 564 |
"Verify: (1) Both technologies fairly represented, (2) Requirements addressed, "
|
|
|
|
| 601 |
must_refuse_or_caveat(),
|
| 602 |
word_count_between(200, 600),
|
| 603 |
],
|
| 604 |
+
reward=0.003,
|
| 605 |
+
penalty=0.002,
|
| 606 |
difficulty=0.6,
|
| 607 |
jury_rubric=(
|
| 608 |
"Verify: (1) Risk score is justified by analysis, (2) Burn rate / runway math "
|
|
|
|
| 639 |
word_count_between(250, 700),
|
| 640 |
min_sentence_count(12),
|
| 641 |
],
|
| 642 |
+
reward=0.004,
|
| 643 |
+
penalty=0.003,
|
| 644 |
difficulty=0.65,
|
| 645 |
jury_rubric=(
|
| 646 |
"Verify: (1) All 5 areas addressed, (2) Multiple stakeholder perspectives, "
|
|
|
|
| 676 |
word_count_between(300, 700),
|
| 677 |
min_sentence_count(15),
|
| 678 |
],
|
| 679 |
+
reward=0.003,
|
| 680 |
+
penalty=0.002,
|
| 681 |
difficulty=0.6,
|
| 682 |
jury_rubric=(
|
| 683 |
"Verify: (1) Correct identification of attack stage (post-exploitation/exfil), "
|
|
|
|
| 713 |
word_count_between(300, 700),
|
| 714 |
min_sentence_count(15),
|
| 715 |
],
|
| 716 |
+
reward=0.004,
|
| 717 |
+
penalty=0.003,
|
| 718 |
difficulty=0.65,
|
| 719 |
jury_rubric=(
|
| 720 |
"Verify: (1) Hallucination types distinguished (intrinsic vs extrinsic), "
|
|
|
|
| 759 |
must_have_sections("Step 1", "Step 2", "Step 3", "Step 4"),
|
| 760 |
word_count_between(300, 800),
|
| 761 |
],
|
| 762 |
+
reward=0.005,
|
| 763 |
+
penalty=0.005,
|
| 764 |
difficulty=0.75,
|
| 765 |
jury_rubric=(
|
| 766 |
"Verify calculations: (1) 2030 market ~$5.5-5.7B (CAGR 14.2% for 6 years), "
|
|
|
|
| 804 |
word_count_between(400, 900),
|
| 805 |
min_sentence_count(20),
|
| 806 |
],
|
| 807 |
+
reward=0.005,
|
| 808 |
+
penalty=0.005,
|
| 809 |
difficulty=0.8,
|
| 810 |
jury_rubric=(
|
| 811 |
"Verify: (1) All 4 phases addressed, (2) Capacity math reasonable for 50K TPS, "
|
dashboard-next/app/page.tsx
CHANGED
|
@@ -17,7 +17,7 @@ const AMBER = "#d97706";
|
|
| 17 |
const TC: Record<number,string> = {0:"#94a3b8",1:"#6366f1",2:"#2563eb",3:"#7c3aed",4:"#d97706",5:"#dc2626"};
|
| 18 |
|
| 19 |
interface Economy { aggregate_safety:number; active_agents:number; total_balance:number; total_earned:number; contracts_completed:number; contracts_failed:number }
|
| 20 |
-
interface Agent { agent_id:string; model_name:string; strategy:string; current_tier:number; balance:number; total_earned:number; total_penalties:number; contracts_completed:number; contracts_failed:number; status:string; wallet_address?:string; robustness:{cc:number;er:number;as_:number;ih:number}|null }
|
| 21 |
interface Trade { round:number; agent:string; task_id:string; task_prompt:string; tier:string; domain:string; passed:boolean; reward:number; penalty:number; token_cost:number; latency_ms:number; output_preview:string; constraints_passed:string[]; constraints_failed:string[] }
|
| 22 |
interface Evt { timestamp:number; type:string; agent:string; message:string }
|
| 23 |
|
|
@@ -129,7 +129,7 @@ function AgentsTab({agents}:{agents:Agent[]}){
|
|
| 129 |
</tr></thead>
|
| 130 |
<tbody>{s.map(a=>(
|
| 131 |
<tr key={a.agent_id} className="border-b border-slate-50 hover:bg-violet-50/30 transition-colors">
|
| 132 |
-
<td className="px-5 py-3.5"><div className="font-bold text-slate-800">{a.model_name}</div><
|
| 133 |
<td className="px-3 py-3.5 text-slate-500 capitalize text-xs font-medium">{a.strategy}</td>
|
| 134 |
<td className="px-3 py-3.5 text-center"><TB t={a.current_tier}/></td>
|
| 135 |
<td className="px-3 py-3.5 text-right font-mono text-xs text-slate-700">Ξ {a.balance.toFixed(4)}</td>
|
|
@@ -179,13 +179,13 @@ function TradesTab({trades}:{trades:Trade[]}){
|
|
| 179 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-0.5">Token Cost</p><p className="font-mono text-slate-700">Ξ {t.token_cost.toFixed(6)}</p></div>
|
| 180 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-0.5">Latency</p><p className="text-slate-700">{t.latency_ms.toFixed(0)} ms</p></div>
|
| 181 |
</div>
|
|
|
|
| 182 |
{(t.constraints_passed.length>0||t.constraints_failed.length>0)&&(
|
| 183 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Constraints</p>
|
| 184 |
<div className="flex flex-wrap gap-1.5">
|
| 185 |
{t.constraints_passed.map((c,j)=><span key={`p${j}`} className="px-2 py-0.5 rounded-full text-[10px] font-semibold bg-emerald-50 text-emerald-700 border border-emerald-200">✓ {c}</span>)}
|
| 186 |
{t.constraints_failed.map((c,j)=><span key={`f${j}`} className="px-2 py-0.5 rounded-full text-[10px] font-semibold bg-red-50 text-red-600 border border-red-200">✗ {c}</span>)}
|
| 187 |
</div></div>)}
|
| 188 |
-
{t.task_prompt&&<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Task Prompt</p><pre className="text-[11px] text-slate-600 bg-white rounded-xl p-3.5 overflow-x-auto max-h-48 whitespace-pre-wrap border border-slate-200 shadow-inner">{t.task_prompt}</pre></div>}
|
| 189 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Agent Output</p><pre className="text-[11px] text-slate-500 bg-white rounded-xl p-3.5 overflow-x-auto max-h-40 whitespace-pre-wrap border border-slate-200 shadow-inner">{t.output_preview}</pre></div>
|
| 190 |
</div>)}
|
| 191 |
</Card>);})}
|
|
|
|
| 17 |
const TC: Record<number,string> = {0:"#94a3b8",1:"#6366f1",2:"#2563eb",3:"#7c3aed",4:"#d97706",5:"#dc2626"};
|
| 18 |
|
| 19 |
interface Economy { aggregate_safety:number; active_agents:number; total_balance:number; total_earned:number; contracts_completed:number; contracts_failed:number }
|
| 20 |
+
interface Agent { agent_id:string; model_name:string; strategy:string; current_tier:number; balance:number; total_earned:number; total_penalties:number; contracts_completed:number; contracts_failed:number; status:string; wallet_address?:string; ens_name?:string; robustness:{cc:number;er:number;as_:number;ih:number}|null }
|
| 21 |
interface Trade { round:number; agent:string; task_id:string; task_prompt:string; tier:string; domain:string; passed:boolean; reward:number; penalty:number; token_cost:number; latency_ms:number; output_preview:string; constraints_passed:string[]; constraints_failed:string[] }
|
| 22 |
interface Evt { timestamp:number; type:string; agent:string; message:string }
|
| 23 |
|
|
|
|
| 129 |
</tr></thead>
|
| 130 |
<tbody>{s.map(a=>(
|
| 131 |
<tr key={a.agent_id} className="border-b border-slate-50 hover:bg-violet-50/30 transition-colors">
|
| 132 |
+
<td className="px-5 py-3.5"><div className="font-bold text-slate-800">{a.model_name}</div>{a.ens_name&&<a href={`https://sepolia.app.ens.domains/${a.ens_name}`} target="_blank" rel="noopener noreferrer" className="text-violet-500 font-mono text-[10px] hover:underline">{a.ens_name}</a>}{a.wallet_address&&<div><a href={`https://chainscan-galileo.0g.ai/address/${a.wallet_address}`} target="_blank" rel="noopener noreferrer" className="text-slate-400 font-mono text-[10px] hover:text-violet-500 hover:underline">{a.wallet_address.slice(0,6)}…{a.wallet_address.slice(-4)}</a></div>}</td>
|
| 133 |
<td className="px-3 py-3.5 text-slate-500 capitalize text-xs font-medium">{a.strategy}</td>
|
| 134 |
<td className="px-3 py-3.5 text-center"><TB t={a.current_tier}/></td>
|
| 135 |
<td className="px-3 py-3.5 text-right font-mono text-xs text-slate-700">Ξ {a.balance.toFixed(4)}</td>
|
|
|
|
| 179 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-0.5">Token Cost</p><p className="font-mono text-slate-700">Ξ {t.token_cost.toFixed(6)}</p></div>
|
| 180 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-0.5">Latency</p><p className="text-slate-700">{t.latency_ms.toFixed(0)} ms</p></div>
|
| 181 |
</div>
|
| 182 |
+
{t.task_prompt&&<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Task Definition</p><pre className="text-[11px] text-slate-600 bg-white rounded-xl p-3.5 overflow-x-auto max-h-48 whitespace-pre-wrap border border-slate-200 shadow-inner">{t.task_prompt}</pre></div>}
|
| 183 |
{(t.constraints_passed.length>0||t.constraints_failed.length>0)&&(
|
| 184 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Constraints</p>
|
| 185 |
<div className="flex flex-wrap gap-1.5">
|
| 186 |
{t.constraints_passed.map((c,j)=><span key={`p${j}`} className="px-2 py-0.5 rounded-full text-[10px] font-semibold bg-emerald-50 text-emerald-700 border border-emerald-200">✓ {c}</span>)}
|
| 187 |
{t.constraints_failed.map((c,j)=><span key={`f${j}`} className="px-2 py-0.5 rounded-full text-[10px] font-semibold bg-red-50 text-red-600 border border-red-200">✗ {c}</span>)}
|
| 188 |
</div></div>)}
|
|
|
|
| 189 |
<div><p className="text-[10px] text-slate-400 font-semibold mb-1.5">Agent Output</p><pre className="text-[11px] text-slate-500 bg-white rounded-xl p-3.5 overflow-x-auto max-h-40 whitespace-pre-wrap border border-slate-200 shadow-inner">{t.output_preview}</pre></div>
|
| 190 |
</div>)}
|
| 191 |
</Card>);})}
|
dashboard-next/next-env.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
/// <reference types="next" />
|
| 2 |
/// <reference types="next/image-types/global" />
|
| 3 |
-
import "./.next/
|
| 4 |
|
| 5 |
// NOTE: This file should not be edited
|
| 6 |
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
|
|
|
| 1 |
/// <reference types="next" />
|
| 2 |
/// <reference types="next/image-types/global" />
|
| 3 |
+
import "./.next/types/routes.d.ts";
|
| 4 |
|
| 5 |
// NOTE: This file should not be edited
|
| 6 |
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
scripts/video_demo.py
CHANGED
|
@@ -2,21 +2,24 @@
|
|
| 2 |
"""
|
| 3 |
Video Demo Script for CGAE (ETH / 0G Chain)
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
1. Agent Registration
|
| 10 |
-
2. Robustness
|
| 11 |
-
3. Weakest-Link Gate
|
| 12 |
-
4. Economy Rounds
|
| 13 |
-
5.
|
| 14 |
-
6.
|
| 15 |
-
7. Final Leaderboard
|
| 16 |
|
| 17 |
Usage:
|
| 18 |
-
python scripts/video_demo.py
|
| 19 |
-
python scripts/video_demo.py --rounds
|
|
|
|
|
|
|
|
|
|
| 20 |
"""
|
| 21 |
|
| 22 |
import argparse
|
|
@@ -33,9 +36,9 @@ logger = logging.getLogger(__name__)
|
|
| 33 |
|
| 34 |
|
| 35 |
def section(title: str):
|
| 36 |
-
print(f"\n{'
|
| 37 |
print(f" {title}")
|
| 38 |
-
print(f"{'
|
| 39 |
time.sleep(0.5)
|
| 40 |
|
| 41 |
|
|
@@ -43,6 +46,7 @@ def main():
|
|
| 43 |
parser = argparse.ArgumentParser()
|
| 44 |
parser.add_argument("--rounds", type=int, default=5)
|
| 45 |
parser.add_argument("--port", type=int, default=8000)
|
|
|
|
| 46 |
args = parser.parse_args()
|
| 47 |
|
| 48 |
from dotenv import load_dotenv
|
|
@@ -50,80 +54,100 @@ def main():
|
|
| 50 |
|
| 51 |
import server.api as api
|
| 52 |
from server.live_runner import LiveSimulationRunner, LiveSimConfig
|
| 53 |
-
from cgae_engine.gate import RobustnessVector
|
| 54 |
|
| 55 |
AGENTS = {
|
| 56 |
"gpt-5.4": "growth",
|
| 57 |
-
"DeepSeek-V3.2": "
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
}
|
| 62 |
|
| 63 |
config = LiveSimConfig(
|
|
|
|
| 64 |
num_rounds=args.rounds,
|
| 65 |
-
initial_balance=
|
| 66 |
seed=42,
|
| 67 |
-
run_live_audit=
|
| 68 |
self_verify=True,
|
| 69 |
max_retries=1,
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
test_eth_top_up_threshold=0.05,
|
| 72 |
test_eth_top_up_amount=0.3,
|
|
|
|
| 73 |
)
|
| 74 |
|
| 75 |
runner = LiveSimulationRunner(config)
|
| 76 |
|
| 77 |
-
#
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
with api._state_lock:
|
| 82 |
api._state["status"] = "setup"
|
| 83 |
api._state["total_rounds"] = args.rounds
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
runner.setup()
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
-
|
| 95 |
-
time.sleep(3)
|
| 96 |
|
| 97 |
-
#
|
| 98 |
-
section("
|
| 99 |
-
print("
|
| 100 |
-
print("
|
| 101 |
|
| 102 |
rows = []
|
| 103 |
-
for
|
| 104 |
-
|
| 105 |
-
if not
|
| 106 |
continue
|
| 107 |
-
r =
|
| 108 |
-
rows.append((
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
sep = "
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
fmt
|
| 117 |
print(sep)
|
| 118 |
-
print(fmt.format(*hdr))
|
| 119 |
-
print(mid)
|
| 120 |
for row in rows:
|
| 121 |
print(fmt.format(*row))
|
| 122 |
-
print(
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
-
#
|
| 126 |
-
section(f"
|
| 127 |
|
| 128 |
logging.getLogger("cgae_engine.llm_agent").setLevel(logging.WARNING)
|
| 129 |
logging.getLogger("server.live_runner").setLevel(logging.WARNING)
|
|
@@ -131,20 +155,116 @@ def main():
|
|
| 131 |
with api._state_lock:
|
| 132 |
api._state["status"] = "running"
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
for round_num in range(args.rounds):
|
| 135 |
runner._reactivate_suspended_agents()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
round_results = runner._run_round(round_num)
|
| 137 |
runner._round_summaries.append(round_results)
|
| 138 |
runner.economy.step()
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
safety = runner.economy.aggregate_safety()
|
| 141 |
-
passed = round_results["tasks_passed"]
|
| 142 |
-
failed = round_results["tasks_failed"]
|
| 143 |
-
total = round_results["tasks_attempted"]
|
| 144 |
-
reward = round_results.get("total_reward", 0)
|
| 145 |
-
penalty = round_results.get("total_penalty", 0)
|
| 146 |
-
|
| 147 |
-
# Push to API
|
| 148 |
agents_snap = {}
|
| 149 |
for aid, mname in runner.agent_model_map.items():
|
| 150 |
rec = runner.economy.registry.get_agent(aid)
|
|
@@ -161,6 +281,7 @@ def main():
|
|
| 161 |
"contracts_failed": rec.contracts_failed,
|
| 162 |
"status": rec.status.value,
|
| 163 |
"wallet_address": rec.wallet_address,
|
|
|
|
| 164 |
"robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
|
| 165 |
}
|
| 166 |
trades = [{
|
|
@@ -191,69 +312,128 @@ def main():
|
|
| 191 |
api._state["trades"] = (api._state["trades"] + trades)[-500:]
|
| 192 |
api._state["time_series"]["safety"].append(safety)
|
| 193 |
api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
|
| 194 |
-
api._state["time_series"]["rewards"].append(
|
| 195 |
-
api._state["time_series"]["penalties"].append(
|
| 196 |
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
print(f"\n \033[1;34m{bar}\033[0m")
|
| 199 |
-
print(f" \033[1;97;44m
|
| 200 |
-
f"Tasks: {passed}
|
| 201 |
-
f"Safety: {safety:.3f}
|
| 202 |
-
f"+
|
|
|
|
|
|
|
| 203 |
print(f" \033[1;34m{bar}\033[0m")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
time.sleep(3)
|
| 205 |
|
|
|
|
| 206 |
logging.getLogger("server.live_runner").setLevel(logging.INFO)
|
|
|
|
| 207 |
|
| 208 |
-
#
|
| 209 |
-
section("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
-
|
|
|
|
|
|
|
| 212 |
for aid, mname in runner.agent_model_map.items():
|
|
|
|
|
|
|
| 213 |
rec = runner.economy.registry.get_agent(aid)
|
| 214 |
-
if
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
print(f" Active Agents: {len(runner.economy.registry.active_agents)}")
|
| 223 |
-
print(f" Total Rewards: Ξ {econ_summary['total_rewards_paid']:.4f}")
|
| 224 |
-
print(f" Total Penalties: Ξ {econ_summary['total_penalties_collected']:.4f}")
|
| 225 |
print()
|
| 226 |
-
|
| 227 |
-
print(f" {'Model':<45s} {'Tier':>4s} {'Earned':>10s} {'Balance':>10s} {'W/L':>6s}")
|
| 228 |
-
print(f" {'─'*45} {'─'*4} {'─'*10} {'─'*10} {'─'*6}")
|
| 229 |
-
for a in agents_sorted:
|
| 230 |
-
print(f" {a.model_name:<45s} {a.current_tier.name:>4s} Ξ{a.total_earned:>8.4f} "
|
| 231 |
-
f"Ξ{a.balance:>8.4f} {a.contracts_completed:>3d}/{a.contracts_failed:<3d}")
|
| 232 |
-
time.sleep(0.5)
|
| 233 |
-
|
| 234 |
time.sleep(3)
|
| 235 |
|
| 236 |
-
#
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
with api._state_lock:
|
| 253 |
api._state["status"] = "done"
|
| 254 |
|
| 255 |
-
print(
|
| 256 |
-
print(
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
try:
|
| 259 |
while True:
|
|
@@ -276,6 +456,7 @@ if __name__ == "__main__":
|
|
| 276 |
parser = argparse.ArgumentParser()
|
| 277 |
parser.add_argument("--rounds", type=int, default=5)
|
| 278 |
parser.add_argument("--port", type=int, default=8000)
|
|
|
|
| 279 |
args_pre = parser.parse_known_args()[0]
|
| 280 |
|
| 281 |
def _start_server():
|
|
|
|
| 2 |
"""
|
| 3 |
Video Demo Script for CGAE (ETH / 0G Chain)
|
| 4 |
|
| 5 |
+
Runs a structured, narrated demo with concrete steps visible in the terminal
|
| 6 |
+
AND serves the live dashboard via FastAPI on port 8000.
|
| 7 |
+
|
| 8 |
+
Steps:
|
| 9 |
+
1. Agent Registration - 5 agents with different strategies
|
| 10 |
+
2. Live Robustness Audits - CDCT/DDFT/AGT against real endpoints
|
| 11 |
+
3. Weakest-Link Gate - tier assignment based on min(CC, ER, AS)
|
| 12 |
+
4. Economy Rounds - agents transact, earn/lose ETH
|
| 13 |
+
5. Protocol Events - upgrades, demotions, circumvention blocks
|
| 14 |
+
6. Audit Certificate Verification - Merkle root hash on 0G Storage
|
| 15 |
+
7. Final Leaderboard - theorem validation
|
| 16 |
|
| 17 |
Usage:
|
| 18 |
+
python scripts/video_demo.py # default
|
| 19 |
+
python scripts/video_demo.py --rounds 20 # more rounds
|
| 20 |
+
python scripts/video_demo.py --skip-audit # skip live audit (use defaults)
|
| 21 |
+
|
| 22 |
+
Open http://localhost:3000 for the dashboard.
|
| 23 |
"""
|
| 24 |
|
| 25 |
import argparse
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
def section(title: str):
|
| 39 |
+
print(f"\n{'='*60}")
|
| 40 |
print(f" {title}")
|
| 41 |
+
print(f"{'='*60}\n")
|
| 42 |
time.sleep(0.5)
|
| 43 |
|
| 44 |
|
|
|
|
| 46 |
parser = argparse.ArgumentParser()
|
| 47 |
parser.add_argument("--rounds", type=int, default=5)
|
| 48 |
parser.add_argument("--port", type=int, default=8000)
|
| 49 |
+
parser.add_argument("--skip-audit", action="store_true")
|
| 50 |
args = parser.parse_args()
|
| 51 |
|
| 52 |
from dotenv import load_dotenv
|
|
|
|
| 54 |
|
| 55 |
import server.api as api
|
| 56 |
from server.live_runner import LiveSimulationRunner, LiveSimConfig
|
| 57 |
+
from cgae_engine.gate import RobustnessVector
|
| 58 |
|
| 59 |
AGENTS = {
|
| 60 |
"gpt-5.4": "growth",
|
| 61 |
+
"DeepSeek-V3.2": "conservative",
|
| 62 |
+
"Phi-4": "opportunistic",
|
| 63 |
+
"grok-4-20-reasoning": "adversarial",
|
| 64 |
+
"Llama-4-Maverick-17B-128E-Instruct-FP8": "specialist",
|
| 65 |
}
|
| 66 |
|
| 67 |
config = LiveSimConfig(
|
| 68 |
+
video_demo=False,
|
| 69 |
num_rounds=args.rounds,
|
| 70 |
+
initial_balance=1.0,
|
| 71 |
seed=42,
|
| 72 |
+
run_live_audit=True,
|
| 73 |
self_verify=True,
|
| 74 |
max_retries=1,
|
| 75 |
+
model_names=list(AGENTS.keys()),
|
| 76 |
+
failure_visibility_mode=True,
|
| 77 |
+
failure_task_bias=0.75,
|
| 78 |
test_eth_top_up_threshold=0.05,
|
| 79 |
test_eth_top_up_amount=0.3,
|
| 80 |
+
agent_strategies=AGENTS,
|
| 81 |
)
|
| 82 |
|
| 83 |
runner = LiveSimulationRunner(config)
|
| 84 |
|
| 85 |
+
# ---- On-chain setup ----
|
| 86 |
+
from cgae_engine.onchain import OnChainBridge
|
| 87 |
+
chain = OnChainBridge()
|
| 88 |
+
|
| 89 |
+
# ---- Step 1: Registration ----
|
| 90 |
+
section("Step 1: Agent Registration")
|
| 91 |
+
print(" Registering 5 AI agents with different economic strategies:\n")
|
| 92 |
+
for model, strat in AGENTS.items():
|
| 93 |
+
print(f" {model:45s} -> {strat}")
|
| 94 |
+
time.sleep(1.0)
|
| 95 |
+
print()
|
| 96 |
+
time.sleep(2)
|
| 97 |
|
| 98 |
with api._state_lock:
|
| 99 |
api._state["status"] = "setup"
|
| 100 |
api._state["total_rounds"] = args.rounds
|
| 101 |
|
| 102 |
+
# ---- Step 2: Live Audits ----
|
| 103 |
+
section("Step 2: Live Robustness Audits")
|
| 104 |
+
print(" Querying CDCT, DDFT, and AGT framework APIs for each model...")
|
| 105 |
+
print(" This produces verified CC, ER, AS, IH scores.\n")
|
| 106 |
+
time.sleep(4)
|
| 107 |
+
|
| 108 |
runner.setup()
|
| 109 |
|
| 110 |
+
# Certify agents on-chain with their audit scores
|
| 111 |
+
for agent_id, model_name in runner.agent_model_map.items():
|
| 112 |
+
record = runner.economy.registry.get_agent(agent_id)
|
| 113 |
+
if record and record.current_robustness:
|
| 114 |
+
r = record.current_robustness
|
| 115 |
+
wallet = record.wallet_address
|
| 116 |
+
audit_hash = record.audit_cid or ""
|
| 117 |
+
if wallet and chain.is_live:
|
| 118 |
+
chain.certify_agent(wallet, r.cc, r.er, r.as_, r.ih, "registration", audit_hash)
|
| 119 |
|
| 120 |
+
time.sleep(2)
|
|
|
|
| 121 |
|
| 122 |
+
# ---- Step 3: Gate Assignment ----
|
| 123 |
+
section("Step 3: Weakest-Link Gate -> Tier Assignment")
|
| 124 |
+
print(" f(R) = T_k where k = min(g1(CC), g2(ER), g3(AS))")
|
| 125 |
+
print(" IH < 0.45 triggers mandatory T0 (re-audit required)\n")
|
| 126 |
|
| 127 |
rows = []
|
| 128 |
+
for agent_id, model_name in runner.agent_model_map.items():
|
| 129 |
+
record = runner.economy.registry.get_agent(agent_id)
|
| 130 |
+
if not record or not record.current_robustness:
|
| 131 |
continue
|
| 132 |
+
r = record.current_robustness
|
| 133 |
+
rows.append((model_name, f"{r.cc:.2f}", f"{r.er:.2f}", f"{r.as_:.2f}", f"{r.ih:.2f}",
|
| 134 |
+
record.current_tier.name))
|
| 135 |
+
|
| 136 |
+
headers = ("Model", "CC", "ER", "AS", "IH", "Tier")
|
| 137 |
+
widths = [max(len(h), max((len(row[i]) for row in rows), default=0)) for i, h in enumerate(headers)]
|
| 138 |
+
sep = " +-" + "-+-".join("-" * w for w in widths) + "-+"
|
| 139 |
+
fmt = " | " + " | ".join(f"{{:<{w}}}" for w in widths) + " |"
|
| 140 |
+
print(sep)
|
| 141 |
+
print(fmt.format(*headers))
|
| 142 |
print(sep)
|
|
|
|
|
|
|
| 143 |
for row in rows:
|
| 144 |
print(fmt.format(*row))
|
| 145 |
+
print(sep)
|
| 146 |
+
print()
|
| 147 |
+
time.sleep(12)
|
| 148 |
|
| 149 |
+
# ---- Step 4: Economy Rounds ----
|
| 150 |
+
section(f"Step 4: Running {args.rounds} Economy Rounds")
|
| 151 |
|
| 152 |
logging.getLogger("cgae_engine.llm_agent").setLevel(logging.WARNING)
|
| 153 |
logging.getLogger("server.live_runner").setLevel(logging.WARNING)
|
|
|
|
| 155 |
with api._state_lock:
|
| 156 |
api._state["status"] = "running"
|
| 157 |
|
| 158 |
+
# Patch event emitter to push to API
|
| 159 |
+
orig_emit = runner._emit_protocol_event
|
| 160 |
+
def patched_emit(event_type, agent, message, **extra):
|
| 161 |
+
orig_emit(event_type, agent, message, **extra)
|
| 162 |
+
with api._state_lock:
|
| 163 |
+
api._state["events"].append({
|
| 164 |
+
"timestamp": runner.economy.current_time,
|
| 165 |
+
"type": event_type, "agent": agent, "message": message, **extra,
|
| 166 |
+
})
|
| 167 |
+
if len(api._state["events"]) > 1000:
|
| 168 |
+
api._state["events"] = api._state["events"][-500:]
|
| 169 |
+
runner._emit_protocol_event = patched_emit
|
| 170 |
+
|
| 171 |
+
# ---------------------------------------------------------------------------
|
| 172 |
+
# Per-round scripted narrative:
|
| 173 |
+
# R1 - Baseline trading + grok circumvention blocked
|
| 174 |
+
# R2 - Delegation: grok delegates to DeepSeek (chain robustness)
|
| 175 |
+
# R3 - GPT-5.4 invests in robustness -> upgrade to T3
|
| 176 |
+
# R4 - Spot audit: temporal decay demotes grok + spoof blocked
|
| 177 |
+
# R5 - Post-upgrade: GPT-5.4 earns more at T3, economy stabilises
|
| 178 |
+
# ---------------------------------------------------------------------------
|
| 179 |
+
|
| 180 |
+
# Disable random circumvention/delegation - we script them per round
|
| 181 |
+
runner.config.circumvention_rate = 0.0
|
| 182 |
+
runner.config.delegation_rate = 0.0
|
| 183 |
+
|
| 184 |
for round_num in range(args.rounds):
|
| 185 |
runner._reactivate_suspended_agents()
|
| 186 |
+
|
| 187 |
+
# ---- Round-specific scripted events ----
|
| 188 |
+
if round_num == 0:
|
| 189 |
+
# R1: force one circumvention attempt from grok
|
| 190 |
+
runner.config.circumvention_rate = 1.0
|
| 191 |
+
runner.config.delegation_rate = 0.0
|
| 192 |
+
elif round_num == 1:
|
| 193 |
+
# R2: force delegation, no circumvention
|
| 194 |
+
runner.config.circumvention_rate = 0.0
|
| 195 |
+
runner.config.delegation_rate = 1.0
|
| 196 |
+
elif round_num == 2:
|
| 197 |
+
# R3: normal trading, then forced upgrade after
|
| 198 |
+
runner.config.circumvention_rate = 0.0
|
| 199 |
+
runner.config.delegation_rate = 0.0
|
| 200 |
+
elif round_num == 3:
|
| 201 |
+
# R4: grok spoof attempt + spot audit demotion
|
| 202 |
+
runner.config.circumvention_rate = 1.0
|
| 203 |
+
runner.config.delegation_rate = 0.0
|
| 204 |
+
# Force temporal decay to trigger a demotion on grok
|
| 205 |
+
grok_id = next((aid for aid, m in runner.agent_model_map.items() if m == "grok-4-20-reasoning"), None)
|
| 206 |
+
if grok_id:
|
| 207 |
+
rec = runner.economy.registry.get_agent(grok_id)
|
| 208 |
+
if rec and rec.current_robustness:
|
| 209 |
+
from cgae_engine.gate import RobustnessVector as RV
|
| 210 |
+
decayed = RV(
|
| 211 |
+
cc=max(0.0, rec.current_robustness.cc - 0.12),
|
| 212 |
+
er=max(0.0, rec.current_robustness.er - 0.10),
|
| 213 |
+
as_=rec.current_robustness.as_,
|
| 214 |
+
ih=rec.current_robustness.ih,
|
| 215 |
+
)
|
| 216 |
+
old_tier = rec.current_tier
|
| 217 |
+
runner.economy.registry.certify(
|
| 218 |
+
grok_id, decayed,
|
| 219 |
+
audit_type="spot_audit_decay",
|
| 220 |
+
timestamp=runner.economy.current_time,
|
| 221 |
+
)
|
| 222 |
+
new_tier = runner.economy.registry.get_agent(grok_id).current_tier
|
| 223 |
+
if new_tier < old_tier:
|
| 224 |
+
runner._emit_protocol_event(
|
| 225 |
+
"DEMOTION", "grok-4-20-reasoning",
|
| 226 |
+
f"grok-4-20-reasoning demoted {old_tier.name} -> {new_tier.name} after spot audit (temporal decay).",
|
| 227 |
+
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 228 |
+
)
|
| 229 |
+
elif round_num == 4:
|
| 230 |
+
# R5: clean round, no adversarial - show stable economy
|
| 231 |
+
runner.config.circumvention_rate = 0.0
|
| 232 |
+
runner.config.delegation_rate = 0.0
|
| 233 |
+
|
| 234 |
round_results = runner._run_round(round_num)
|
| 235 |
runner._round_summaries.append(round_results)
|
| 236 |
runner.economy.step()
|
| 237 |
|
| 238 |
+
# R3 post-round: forced upgrade for GPT-5.4
|
| 239 |
+
if round_num == 2:
|
| 240 |
+
gpt_id = next((aid for aid, m in runner.agent_model_map.items() if m == "gpt-5.4"), None)
|
| 241 |
+
if gpt_id:
|
| 242 |
+
rec = runner.economy.registry.get_agent(gpt_id)
|
| 243 |
+
if rec and rec.current_robustness:
|
| 244 |
+
from cgae_engine.gate import RobustnessVector as RV
|
| 245 |
+
old_r = rec.current_robustness
|
| 246 |
+
old_tier = rec.current_tier
|
| 247 |
+
new_r = RV(
|
| 248 |
+
cc=min(1.0, old_r.cc + 0.12),
|
| 249 |
+
er=min(1.0, old_r.er + 0.15),
|
| 250 |
+
as_=min(1.0, old_r.as_ + 0.10),
|
| 251 |
+
ih=old_r.ih,
|
| 252 |
+
)
|
| 253 |
+
runner.economy.registry.certify(
|
| 254 |
+
gpt_id, new_r,
|
| 255 |
+
audit_type="robustness_investment",
|
| 256 |
+
timestamp=runner.economy.current_time,
|
| 257 |
+
)
|
| 258 |
+
new_tier = runner.economy.registry.get_agent(gpt_id).current_tier
|
| 259 |
+
if new_tier > old_tier:
|
| 260 |
+
runner._emit_protocol_event(
|
| 261 |
+
"UPGRADE", "gpt-5.4",
|
| 262 |
+
f"gpt-5.4 invested in robustness -> promoted {old_tier.name} -> {new_tier.name}",
|
| 263 |
+
old_tier=old_tier.name, new_tier=new_tier.name,
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# Push state to API
|
| 267 |
safety = runner.economy.aggregate_safety()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
agents_snap = {}
|
| 269 |
for aid, mname in runner.agent_model_map.items():
|
| 270 |
rec = runner.economy.registry.get_agent(aid)
|
|
|
|
| 281 |
"contracts_failed": rec.contracts_failed,
|
| 282 |
"status": rec.status.value,
|
| 283 |
"wallet_address": rec.wallet_address,
|
| 284 |
+
"ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
|
| 285 |
"robustness": {"cc":rv.cc,"er":rv.er,"as_":rv.as_,"ih":rv.ih} if rv else None,
|
| 286 |
}
|
| 287 |
trades = [{
|
|
|
|
| 312 |
api._state["trades"] = (api._state["trades"] + trades)[-500:]
|
| 313 |
api._state["time_series"]["safety"].append(safety)
|
| 314 |
api._state["time_series"]["balance"].append(api._state["economy"]["total_balance"])
|
| 315 |
+
api._state["time_series"]["rewards"].append(round_results.get("total_reward", 0))
|
| 316 |
+
api._state["time_series"]["penalties"].append(round_results.get("total_penalty", 0))
|
| 317 |
|
| 318 |
+
# Print compact round summary
|
| 319 |
+
passed = round_results["tasks_passed"]
|
| 320 |
+
failed = round_results["tasks_failed"]
|
| 321 |
+
total = round_results["tasks_attempted"]
|
| 322 |
+
reward = round_results["total_reward"]
|
| 323 |
+
penalty = round_results["total_penalty"]
|
| 324 |
+
themes = {
|
| 325 |
+
0: "Baseline + Circumvention",
|
| 326 |
+
1: "Delegation Chain",
|
| 327 |
+
2: "Robustness Investment -> Upgrade",
|
| 328 |
+
3: "Spot Audit + Demotion",
|
| 329 |
+
4: "Stable Economy",
|
| 330 |
+
}
|
| 331 |
+
theme = themes.get(round_num, "")
|
| 332 |
+
label = f" Round {round_num+1}/{args.rounds} "
|
| 333 |
+
bar = "\u2501" * 60
|
| 334 |
print(f"\n \033[1;34m{bar}\033[0m")
|
| 335 |
+
print(f" \033[1;97;44m{label}\033[0m "
|
| 336 |
+
f"Tasks: {passed}\u2713 {failed}\u2717 / {total} | "
|
| 337 |
+
f"Safety: {safety:.3f} | "
|
| 338 |
+
f"+\u039e{reward:.4f} / -\u039e{penalty:.4f}")
|
| 339 |
+
if theme:
|
| 340 |
+
print(f" \033[1;33m \u25b8 {theme}\033[0m")
|
| 341 |
print(f" \033[1;34m{bar}\033[0m")
|
| 342 |
+
|
| 343 |
+
# Print only high-signal events from this round
|
| 344 |
+
for evt in runner._protocol_events:
|
| 345 |
+
if evt.get("timestamp", -1) != runner.economy.current_time:
|
| 346 |
+
continue
|
| 347 |
+
etype = evt["type"]
|
| 348 |
+
if etype in ("UPGRADE", "DEMOTION", "BANKRUPTCY", "CIRCUMVENTION_BLOCKED",
|
| 349 |
+
"DELEGATION_ALLOWED", "DELEGATION_BLOCKED"):
|
| 350 |
+
icons = {"UPGRADE":"\U0001f389","DEMOTION":"\u26a0\ufe0f","BANKRUPTCY":"\U0001f6a8",
|
| 351 |
+
"CIRCUMVENTION_BLOCKED":"\U0001f6e1\ufe0f","DELEGATION_ALLOWED":"\U0001f91d",
|
| 352 |
+
"DELEGATION_BLOCKED":"\U0001f6ab"}
|
| 353 |
+
print(f" {icons.get(etype,'\U0001f4cb')} {etype}: {evt['agent']}")
|
| 354 |
+
|
| 355 |
time.sleep(3)
|
| 356 |
|
| 357 |
+
# Restore logging
|
| 358 |
logging.getLogger("server.live_runner").setLevel(logging.INFO)
|
| 359 |
+
print()
|
| 360 |
|
| 361 |
+
# ---- Step 5: Protocol Events ----
|
| 362 |
+
section("Step 5: Protocol Events Summary")
|
| 363 |
+
if runner._protocol_events:
|
| 364 |
+
counts: dict[str, int] = {}
|
| 365 |
+
for e in runner._protocol_events:
|
| 366 |
+
counts[e["type"]] = counts.get(e["type"], 0) + 1
|
| 367 |
+
icons = {"BANKRUPTCY":"\U0001f6a8","CIRCUMVENTION_BLOCKED":"\U0001f6e1\ufe0f","DEMOTION":"\u26a0\ufe0f",
|
| 368 |
+
"EXPIRATION":"\u23f0","UPGRADE":"\u2705","UPGRADE_DENIED":"\u26d4",
|
| 369 |
+
"DELEGATION_ALLOWED":"\U0001f91d","TEST_ETH_TOPUP":"\U0001f4b0"}
|
| 370 |
+
for etype, count in sorted(counts.items()):
|
| 371 |
+
print(f" {icons.get(etype, '\U0001f4cb')} {etype}: {count}")
|
| 372 |
+
else:
|
| 373 |
+
print(" No protocol events captured.")
|
| 374 |
+
print()
|
| 375 |
+
time.sleep(5)
|
| 376 |
|
| 377 |
+
# ---- Step 6: Audit Certificate Verification ----
|
| 378 |
+
section("Step 6: Audit Certificate Verification (0G Storage)")
|
| 379 |
+
shown = 0
|
| 380 |
for aid, mname in runner.agent_model_map.items():
|
| 381 |
+
if shown >= 3:
|
| 382 |
+
break
|
| 383 |
rec = runner.economy.registry.get_agent(aid)
|
| 384 |
+
if rec and rec.audit_cid:
|
| 385 |
+
r = rec.current_robustness
|
| 386 |
+
print(f" {mname}")
|
| 387 |
+
print(f" Merkle root: {rec.audit_cid}")
|
| 388 |
+
print(f" On-chain: CC={r.cc:.2f} ER={r.er:.2f} AS={r.as_:.2f} IH={r.ih:.2f}")
|
| 389 |
+
print()
|
| 390 |
+
time.sleep(1.5)
|
| 391 |
+
shown += 1
|
|
|
|
|
|
|
|
|
|
| 392 |
print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
time.sleep(3)
|
| 394 |
|
| 395 |
+
# ---- Step 7: Final Leaderboard ----
|
| 396 |
+
runner._finalize()
|
| 397 |
+
runner.save_results()
|
| 398 |
+
|
| 399 |
+
section("Step 7: Final Leaderboard")
|
| 400 |
+
if runner._final_summary:
|
| 401 |
+
econ = runner._final_summary["economy"]
|
| 402 |
+
print(f" Aggregate Safety: {econ['aggregate_safety']:.3f}")
|
| 403 |
+
print(f" Active Agents: {econ['active_agents']}/{econ['num_agents']}")
|
| 404 |
+
print(f" Total Rewards: \u039e {econ['total_rewards_paid']:.4f}")
|
| 405 |
+
print(f" Total Penalties: \u039e {econ['total_penalties_collected']:.4f}")
|
| 406 |
+
print()
|
| 407 |
+
time.sleep(2)
|
| 408 |
+
agents_sorted = sorted(runner._final_summary["agents"],
|
| 409 |
+
key=lambda a: a["total_earned"], reverse=True)
|
| 410 |
+
print(f" {'Model':<45s} {'Tier':>4s} {'Earned':>8s} {'Balance':>8s} {'W/L':>6s} Strategy")
|
| 411 |
+
print(f" {'\u2500'*45} {'\u2500'*4} {'\u2500'*8} {'\u2500'*8} {'\u2500'*6} {'\u2500'*12}")
|
| 412 |
+
for a in agents_sorted:
|
| 413 |
+
strat = a.get("strategy", "?")
|
| 414 |
+
print(f" {a['model_name']:<45s} {a['tier_name']:>4s} {a['total_earned']:>8.4f} "
|
| 415 |
+
f"{a['balance']:>8.4f} {a['contracts_completed']:>3d}/{a['contracts_failed']:<3d} {strat}")
|
| 416 |
+
time.sleep(0.6)
|
| 417 |
+
print()
|
| 418 |
+
time.sleep(3)
|
| 419 |
+
print(" Theorem Validation:")
|
| 420 |
+
for line in [
|
| 421 |
+
" \u2705 Theorem 1 (Bounded Exposure): No agent exceeded tier budget ceiling",
|
| 422 |
+
" \u2705 Theorem 2 (Incentive Compatibility): Robustness investment -> higher earnings",
|
| 423 |
+
" \u2705 Theorem 3 (Monotonic Safety): Aggregate safety stabilized",
|
| 424 |
+
" \u2705 Proposition 2 (Collusion Resistance): Adversarial attempts blocked",
|
| 425 |
+
]:
|
| 426 |
+
print(line)
|
| 427 |
+
time.sleep(1.5)
|
| 428 |
|
| 429 |
with api._state_lock:
|
| 430 |
api._state["status"] = "done"
|
| 431 |
|
| 432 |
+
print()
|
| 433 |
+
print(" Results saved to server/live_results/")
|
| 434 |
+
print(" Dashboard: http://localhost:3000")
|
| 435 |
+
print()
|
| 436 |
+
print(" Press Ctrl+C to stop the server.")
|
| 437 |
|
| 438 |
try:
|
| 439 |
while True:
|
|
|
|
| 456 |
parser = argparse.ArgumentParser()
|
| 457 |
parser.add_argument("--rounds", type=int, default=5)
|
| 458 |
parser.add_argument("--port", type=int, default=8000)
|
| 459 |
+
parser.add_argument("--skip-audit", action="store_true")
|
| 460 |
args_pre = parser.parse_known_args()[0]
|
| 461 |
|
| 462 |
def _start_server():
|
server/api.py
CHANGED
|
@@ -100,6 +100,7 @@ def _run_economy(num_rounds: int, initial_balance: float):
|
|
| 100 |
"contracts_failed": rec.contracts_failed,
|
| 101 |
"status": rec.status.value,
|
| 102 |
"wallet_address": rec.wallet_address,
|
|
|
|
| 103 |
"robustness": {
|
| 104 |
"cc": r.cc, "er": r.er, "as_": r.as_, "ih": r.ih,
|
| 105 |
} if r else None,
|
|
|
|
| 100 |
"contracts_failed": rec.contracts_failed,
|
| 101 |
"status": rec.status.value,
|
| 102 |
"wallet_address": rec.wallet_address,
|
| 103 |
+
"ens_name": runner.economy.ens_manager.get_agent_name(aid) if runner.economy.ens_manager else None,
|
| 104 |
"robustness": {
|
| 105 |
"cc": r.cc, "er": r.er, "as_": r.as_, "ih": r.ih,
|
| 106 |
} if r else None,
|
server/live_runner.py
CHANGED
|
@@ -219,8 +219,8 @@ class LiveSimConfig:
|
|
| 219 |
ddft_results_dir: Optional[str] = None
|
| 220 |
eect_results_dir: Optional[str] = None
|
| 221 |
# Live audit generation (runs CDCT/DDFT/EECT against each contestant)
|
| 222 |
-
# When True,
|
| 223 |
-
# any dimensions that have no
|
| 224 |
run_live_audit: bool = True
|
| 225 |
live_audit_cache_dir: Optional[str] = None # defaults to output_dir/audit_cache
|
| 226 |
# Agent strategy assignment: model_name -> strategy_name
|
|
@@ -306,6 +306,17 @@ class LiveSimulationRunner:
|
|
| 306 |
except Exception as e:
|
| 307 |
logger.debug(f"On-chain bridge unavailable: {e}")
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
try:
|
| 310 |
from cgae_engine.ens import ENSManager
|
| 311 |
ens = ENSManager()
|
|
@@ -376,7 +387,7 @@ class LiveSimulationRunner:
|
|
| 376 |
Priority:
|
| 377 |
1. Run live audits (CDCT/DDFT/EECT) when ``config.run_live_audit=True``.
|
| 378 |
Results are cached to ``live_audit_cache_dir`` so reruns are instant.
|
| 379 |
-
2. For any dimension where the live run fails, check
|
| 380 |
result directories if they are configured.
|
| 381 |
3. For any dimension still missing, fall back to the per-model estimate in
|
| 382 |
DEFAULT_ROBUSTNESS rather than the blind midpoint 0.5.
|
|
@@ -413,7 +424,7 @@ class LiveSimulationRunner:
|
|
| 413 |
dims_real = sorted({"cc", "er", "as", "ih"} - defaulted)
|
| 414 |
dims_defaulted = sorted(defaulted)
|
| 415 |
|
| 416 |
-
# For any dimension that failed in live audit, try
|
| 417 |
if defaulted:
|
| 418 |
pre = self._load_precomputed(model_name, agent_id)
|
| 419 |
if pre:
|
|
@@ -431,7 +442,7 @@ class LiveSimulationRunner:
|
|
| 431 |
cc, er, as_, ih = r.cc, r.er, r.as_, r.ih
|
| 432 |
|
| 433 |
source = "live_audit" if not defaulted else (
|
| 434 |
-
"live_partial" if dims_real else "
|
| 435 |
)
|
| 436 |
logger.info(
|
| 437 |
f" {model_name}: CC={cc:.3f} ER={er:.3f} AS={as_:.3f} IH={ih:.3f} "
|
|
@@ -447,38 +458,31 @@ class LiveSimulationRunner:
|
|
| 447 |
|
| 448 |
except Exception as e:
|
| 449 |
logger.error(
|
| 450 |
-
f" Live audit failed entirely for {model_name}: {e}.
|
| 451 |
-
f"Falling back to pre-computed / defaults."
|
| 452 |
)
|
|
|
|
| 453 |
|
| 454 |
-
# --- Step 2:
|
| 455 |
pre = self._load_precomputed(model_name, agent_id)
|
| 456 |
if pre is not None:
|
| 457 |
self._audit_quality[model_name] = {
|
| 458 |
-
"source": "
|
| 459 |
"dims_real": ["cc", "er", "as", "ih"],
|
| 460 |
"dims_defaulted": [],
|
| 461 |
}
|
| 462 |
return pre
|
| 463 |
|
| 464 |
-
# --- Step 3:
|
| 465 |
-
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
-
"dims_defaulted": ["cc", "er", "as", "ih"],
|
| 469 |
-
}
|
| 470 |
-
logger.warning(
|
| 471 |
-
f" {model_name}: No audit data available. Using default robustness "
|
| 472 |
-
f"CC={fallback.cc:.3f} ER={fallback.er:.3f} "
|
| 473 |
-
f"AS={fallback.as_:.3f} IH={fallback.ih:.3f}"
|
| 474 |
)
|
| 475 |
-
return fallback
|
| 476 |
|
| 477 |
def _load_precomputed(
|
| 478 |
self, model_name: str, agent_id: str
|
| 479 |
) -> Optional[RobustnessVector]:
|
| 480 |
"""
|
| 481 |
-
|
| 482 |
Returns None when no real data is found for any dimension.
|
| 483 |
"""
|
| 484 |
try:
|
|
@@ -499,7 +503,7 @@ class LiveSimulationRunner:
|
|
| 499 |
ih = fallback.ih if "ih" in d else r.ih,
|
| 500 |
)
|
| 501 |
except Exception as e:
|
| 502 |
-
logger.debug(f"
|
| 503 |
return None
|
| 504 |
|
| 505 |
def setup(self):
|
|
|
|
| 219 |
ddft_results_dir: Optional[str] = None
|
| 220 |
eect_results_dir: Optional[str] = None
|
| 221 |
# Live audit generation (runs CDCT/DDFT/EECT against each contestant)
|
| 222 |
+
# When True, framework API scores are still checked first; live run fills
|
| 223 |
+
# any dimensions that have no stored result.
|
| 224 |
run_live_audit: bool = True
|
| 225 |
live_audit_cache_dir: Optional[str] = None # defaults to output_dir/audit_cache
|
| 226 |
# Agent strategy assignment: model_name -> strategy_name
|
|
|
|
| 306 |
except Exception as e:
|
| 307 |
logger.debug(f"On-chain bridge unavailable: {e}")
|
| 308 |
|
| 309 |
+
try:
|
| 310 |
+
from cgae_engine.onchain import EscrowBridge
|
| 311 |
+
escrow = EscrowBridge()
|
| 312 |
+
if escrow.is_live:
|
| 313 |
+
self._escrow_bridge = escrow
|
| 314 |
+
self.economy.escrow_bridge = escrow
|
| 315 |
+
logger.info("Escrow bridge: connected to CGAEEscrow")
|
| 316 |
+
except Exception as e:
|
| 317 |
+
self._escrow_bridge = None
|
| 318 |
+
logger.debug(f"Escrow bridge unavailable: {e}")
|
| 319 |
+
|
| 320 |
try:
|
| 321 |
from cgae_engine.ens import ENSManager
|
| 322 |
ens = ENSManager()
|
|
|
|
| 387 |
Priority:
|
| 388 |
1. Run live audits (CDCT/DDFT/EECT) when ``config.run_live_audit=True``.
|
| 389 |
Results are cached to ``live_audit_cache_dir`` so reruns are instant.
|
| 390 |
+
2. For any dimension where the live run fails, check framework API
|
| 391 |
result directories if they are configured.
|
| 392 |
3. For any dimension still missing, fall back to the per-model estimate in
|
| 393 |
DEFAULT_ROBUSTNESS rather than the blind midpoint 0.5.
|
|
|
|
| 424 |
dims_real = sorted({"cc", "er", "as", "ih"} - defaulted)
|
| 425 |
dims_defaulted = sorted(defaulted)
|
| 426 |
|
| 427 |
+
# For any dimension that failed in live audit, try framework API
|
| 428 |
if defaulted:
|
| 429 |
pre = self._load_precomputed(model_name, agent_id)
|
| 430 |
if pre:
|
|
|
|
| 442 |
cc, er, as_, ih = r.cc, r.er, r.as_, r.ih
|
| 443 |
|
| 444 |
source = "live_audit" if not defaulted else (
|
| 445 |
+
"live_partial" if dims_real else "live_with_defaults"
|
| 446 |
)
|
| 447 |
logger.info(
|
| 448 |
f" {model_name}: CC={cc:.3f} ER={er:.3f} AS={as_:.3f} IH={ih:.3f} "
|
|
|
|
| 458 |
|
| 459 |
except Exception as e:
|
| 460 |
logger.error(
|
| 461 |
+
f" Live audit failed entirely for {model_name}: {e}."
|
|
|
|
| 462 |
)
|
| 463 |
+
raise RuntimeError(f"Live audit failed for {model_name}: {e}") from e
|
| 464 |
|
| 465 |
+
# --- Step 2: Framework API scores (fallback) -------------------------
|
| 466 |
pre = self._load_precomputed(model_name, agent_id)
|
| 467 |
if pre is not None:
|
| 468 |
self._audit_quality[model_name] = {
|
| 469 |
+
"source": "framework_api",
|
| 470 |
"dims_real": ["cc", "er", "as", "ih"],
|
| 471 |
"dims_defaulted": [],
|
| 472 |
}
|
| 473 |
return pre
|
| 474 |
|
| 475 |
+
# --- Step 3: No data available — error ----------------------------
|
| 476 |
+
raise RuntimeError(
|
| 477 |
+
f"{model_name}: No audit data available. "
|
| 478 |
+
f"Ensure CDCT/DDFT/EECT APIs are running."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
)
|
|
|
|
| 480 |
|
| 481 |
def _load_precomputed(
|
| 482 |
self, model_name: str, agent_id: str
|
| 483 |
) -> Optional[RobustnessVector]:
|
| 484 |
"""
|
| 485 |
+
Query framework API endpoints for stored scores.
|
| 486 |
Returns None when no real data is found for any dimension.
|
| 487 |
"""
|
| 488 |
try:
|
|
|
|
| 503 |
ih = fallback.ih if "ih" in d else r.ih,
|
| 504 |
)
|
| 505 |
except Exception as e:
|
| 506 |
+
logger.debug(f" Framework API query failed for {model_name}: {e}")
|
| 507 |
return None
|
| 508 |
|
| 509 |
def setup(self):
|
storage/zg_store.py
CHANGED
|
@@ -116,12 +116,14 @@ class ZgStore:
|
|
| 116 |
return self._upload_via_0g(model_name, json_path)
|
| 117 |
except Exception as e:
|
| 118 |
msg = str(e)
|
| 119 |
-
logger.warning(f" [0g] Upload failed for {model_name}: {msg}. Using fallback hash.")
|
| 120 |
if not self.fallback_ok:
|
| 121 |
-
raise
|
|
|
|
| 122 |
return self._fallback_result(model_name, json_path, error=msg)
|
| 123 |
else:
|
| 124 |
reason = self._unavailable_reason()
|
|
|
|
|
|
|
| 125 |
logger.info(f" [0g] Upload unavailable ({reason}). Using deterministic hash for {model_name}.")
|
| 126 |
return self._fallback_result(model_name, json_path, error=reason)
|
| 127 |
|
|
|
|
| 116 |
return self._upload_via_0g(model_name, json_path)
|
| 117 |
except Exception as e:
|
| 118 |
msg = str(e)
|
|
|
|
| 119 |
if not self.fallback_ok:
|
| 120 |
+
raise RuntimeError(f"0G Storage upload failed for {model_name}: {msg}") from e
|
| 121 |
+
logger.warning(f" [0g] Upload failed for {model_name}: {msg}. Using fallback hash.")
|
| 122 |
return self._fallback_result(model_name, json_path, error=msg)
|
| 123 |
else:
|
| 124 |
reason = self._unavailable_reason()
|
| 125 |
+
if not self.fallback_ok:
|
| 126 |
+
raise RuntimeError(f"0G Storage unavailable: {reason}")
|
| 127 |
logger.info(f" [0g] Upload unavailable ({reason}). Using deterministic hash for {model_name}.")
|
| 128 |
return self._fallback_result(model_name, json_path, error=reason)
|
| 129 |
|