Rohan03 commited on
Commit
f0bf034
·
verified ·
1 Parent(s): ce80011

V2 merge: purpose_agent/v2_types.py

Browse files
Files changed (1) hide show
  1. purpose_agent/v2_types.py +101 -0
purpose_agent/v2_types.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ V2 Core Types — RunMode, extended PurposeScore, memory scoping.
3
+
4
+ Backward compatible: all V1 types remain unchanged. V2 additions are new
5
+ classes or optional fields on existing ones.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from enum import Enum
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+
14
+ class RunMode(Enum):
15
+ """
16
+ Controls what the framework is allowed to mutate during a run.
17
+
18
+ learning_train: Full read/write. Memories can be created, Q-values updated,
19
+ heuristics distilled. This is where the agent learns.
20
+
21
+ learning_validation: Read existing memories, but writes go to a staging area.
22
+ Used to validate whether new memories actually help before promoting.
23
+
24
+ eval_test: Pure evaluation. NO memory writes, NO heuristic promotion,
25
+ NO rubric mutation. The agent runs with whatever it has learned.
26
+ This is the only mode whose numbers you can trust.
27
+ """
28
+ LEARNING_TRAIN = "learning_train"
29
+ LEARNING_VALIDATION = "learning_validation"
30
+ EVAL_TEST = "eval_test"
31
+
32
+ @property
33
+ def allows_memory_write(self) -> bool:
34
+ return self == RunMode.LEARNING_TRAIN
35
+
36
+ @property
37
+ def allows_staging_write(self) -> bool:
38
+ return self in (RunMode.LEARNING_TRAIN, RunMode.LEARNING_VALIDATION)
39
+
40
+ @property
41
+ def is_eval(self) -> bool:
42
+ return self == RunMode.EVAL_TEST
43
+
44
+
45
+ @dataclass
46
+ class MemoryScope:
47
+ """
48
+ Scoping metadata for a memory. Determines which contexts a memory is
49
+ eligible to be retrieved in.
50
+ """
51
+ agent_roles: list[str] = field(default_factory=list) # e.g. ["coder", "tester"]
52
+ tool_names: list[str] = field(default_factory=list) # e.g. ["python_exec"]
53
+ task_categories: list[str] = field(default_factory=list) # e.g. ["coding", "debugging"]
54
+ team_protocols: list[str] = field(default_factory=list) # e.g. ["code_review_pipeline"]
55
+ user_id: str = "" # scoped to a specific user
56
+
57
+ def matches(self, query_scope: "MemoryScope") -> bool:
58
+ """Check if this scope overlaps with a query scope. Empty = matches all."""
59
+ if self.agent_roles and query_scope.agent_roles:
60
+ if not set(self.agent_roles) & set(query_scope.agent_roles):
61
+ return False
62
+ if self.tool_names and query_scope.tool_names:
63
+ if not set(self.tool_names) & set(query_scope.tool_names):
64
+ return False
65
+ if self.task_categories and query_scope.task_categories:
66
+ if not set(self.task_categories) & set(query_scope.task_categories):
67
+ return False
68
+ if self.user_id and query_scope.user_id:
69
+ if self.user_id != query_scope.user_id:
70
+ return False
71
+ return True
72
+
73
+
74
+ @dataclass
75
+ class PurposeScoreV2:
76
+ """
77
+ Extended PurposeScore with evidence tracking and hack detection.
78
+
79
+ Backward compatible: original PurposeScore fields are preserved.
80
+ V2 additions are evidence_ids, components, rubric_version, hack_flags.
81
+ """
82
+ phi_before: float
83
+ phi_after: float
84
+ delta: float
85
+ reasoning: str
86
+ evidence: str
87
+ confidence: float
88
+
89
+ # V2 additions
90
+ evidence_ids: list[str] = field(default_factory=list)
91
+ components: dict[str, float] = field(default_factory=dict)
92
+ rubric_version: str = "v1"
93
+ hack_flags: list[str] = field(default_factory=list)
94
+
95
+ @property
96
+ def improved(self) -> bool:
97
+ return self.delta > 0.0
98
+
99
+ @property
100
+ def is_suspicious(self) -> bool:
101
+ return len(self.hack_flags) > 0