soumi guria commited on
Commit
60fc766
·
1 Parent(s): 876d182

changes after round 1

Browse files
backend/main.py CHANGED
@@ -50,59 +50,86 @@ class CLMObservation(OEObservation):
50
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
51
  visible_state: Dict[str, Any] = Field(default_factory=dict)
52
  time_step: int = Field(default=0)
 
 
 
53
  model_config = {"extra": "allow"}
54
 
55
  class CLMState(OEState):
56
- energy: float = Field(default=1.0)
57
- stress: float = Field(default=0.0)
58
- fatigue: float = Field(default=0.0)
59
  focus_mode: bool = Field(default=False)
60
- current_task_id: Optional[str] = Field(default=None)
61
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
62
  model_config = {"extra": "allow"}
63
 
64
 
65
  class CLMEnvWrapper(Environment):
66
  SUPPORTS_CONCURRENT_SESSIONS = True
 
 
67
 
68
  def __init__(self):
69
  super().__init__()
70
- self._env = CLMEnvironment(tasks=generate_tasks("easy"), max_steps=50)
 
71
  self._final_score: float = _SCORE_MIN
 
 
 
 
 
 
 
 
72
 
73
  def _to_oe_obs(self, obs: ModelObservation, done=False,
74
  reward=None, info=None) -> CLMObservation:
75
  return CLMObservation(
76
  tasks=[t.model_dump() for t in obs.tasks],
77
  visible_state=obs.visible_state.model_dump(),
78
- time_step=obs.time_step, done=done, reward=reward, metadata=info or {},
 
 
 
79
  )
80
 
81
  def reset(self, seed=None, episode_id=None, task_id: str = "easy", **kw) -> CLMObservation:
82
- if task_id not in ("easy", "medium", "hard", "expert"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  task_id = "easy"
84
  max_s = 60 if task_id == "expert" else 50
85
- self._env = CLMEnvironment(tasks=generate_tasks(task_id), max_steps=max_s)
86
  self._final_score = _SCORE_MIN
87
- return self._to_oe_obs(self._env.reset())
88
 
89
  def step(self, action: CLMAction, timeout_s=None, **kw) -> CLMObservation:
90
- ma = ModelAction(type=action.type, task_id=action.task_id)
91
  obs, reward, done, info = self._env.step(ma)
92
  if done:
93
  self._final_score = _safe(info.get("final_score",
94
  deterministic_grader(self._env.state.tasks,
95
  self._env.state.time_step, self._env.state.energy)))
96
  info["final_score"] = self._final_score
 
97
  return self._to_oe_obs(obs, done=done, reward=_safe(float(reward)), info=info)
98
 
99
  @property
100
  def state(self):
101
  raw = self._env.state_dict()
102
  return CLMState(
103
- energy=raw.get("energy", 1.0), stress=raw.get("stress", 0.0),
104
- fatigue=raw.get("fatigue", 0.0), focus_mode=raw.get("focus_mode", False),
105
- current_task_id=raw.get("current_task_id"),
106
  tasks=raw.get("tasks", []), step_count=raw.get("time_step", 0),
107
  )
108
 
 
50
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
51
  visible_state: Dict[str, Any] = Field(default_factory=dict)
52
  time_step: int = Field(default=0)
53
+ workers: List[Dict[str, Any]] = Field(default_factory=list)
54
+ schema_drift: Optional[Dict] = Field(default=None)
55
+ final_score: Optional[float] = Field(default=None)
56
  model_config = {"extra": "allow"}
57
 
58
  class CLMState(OEState):
59
+ workers: List[Dict[str, Any]] = Field(default_factory=list)
 
 
60
  focus_mode: bool = Field(default=False)
 
61
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
62
  model_config = {"extra": "allow"}
63
 
64
 
65
  class CLMEnvWrapper(Environment):
66
  SUPPORTS_CONCURRENT_SESSIONS = True
67
+ _agent_score_history: List[float] = []
68
+ _GLOBAL_ENV = None
69
 
70
  def __init__(self):
71
  super().__init__()
72
+ if CLMEnvWrapper._GLOBAL_ENV is None:
73
+ CLMEnvWrapper._GLOBAL_ENV = CLMEnvironment(tasks=generate_tasks("easy"), max_steps=50)
74
  self._final_score: float = _SCORE_MIN
75
+
76
+ @property
77
+ def _env(self):
78
+ return CLMEnvWrapper._GLOBAL_ENV
79
+
80
+ @_env.setter
81
+ def _env(self, value):
82
+ CLMEnvWrapper._GLOBAL_ENV = value
83
 
84
  def _to_oe_obs(self, obs: ModelObservation, done=False,
85
  reward=None, info=None) -> CLMObservation:
86
  return CLMObservation(
87
  tasks=[t.model_dump() for t in obs.tasks],
88
  visible_state=obs.visible_state.model_dump(),
89
+ time_step=obs.time_step, done=done, reward=reward,
90
+ workers=info.get("workers", []) if info else [],
91
+ schema_drift=info.get("schema_drift") if info else None,
92
+ final_score=info.get("final_score") if info else None
93
  )
94
 
95
  def reset(self, seed=None, episode_id=None, task_id: str = "easy", **kw) -> CLMObservation:
96
+ if task_id == "auto":
97
+ hist = self.__class__._agent_score_history
98
+ if len(hist) < 3:
99
+ task_id = "easy"
100
+ else:
101
+ recent_avg = sum(hist[-3:]) / 3.0
102
+ if recent_avg > 0.80:
103
+ task_id = "expert"
104
+ elif recent_avg > 0.60:
105
+ task_id = "hard"
106
+ elif recent_avg > 0.40:
107
+ task_id = "medium"
108
+ else:
109
+ task_id = "easy"
110
+ elif task_id not in ("easy", "medium", "hard", "expert"):
111
  task_id = "easy"
112
  max_s = 60 if task_id == "expert" else 50
113
+ self._env = CLMEnvironment(tasks=generate_tasks(task_id, seed=seed), max_steps=max_s)
114
  self._final_score = _SCORE_MIN
115
+ return self._to_oe_obs(self._env.reset(), info=self._env.state_dict())
116
 
117
  def step(self, action: CLMAction, timeout_s=None, **kw) -> CLMObservation:
118
+ ma = ModelAction(type=action.type, task_id=action.task_id, worker_id=getattr(action, "worker_id", "w1"))
119
  obs, reward, done, info = self._env.step(ma)
120
  if done:
121
  self._final_score = _safe(info.get("final_score",
122
  deterministic_grader(self._env.state.tasks,
123
  self._env.state.time_step, self._env.state.energy)))
124
  info["final_score"] = self._final_score
125
+ self.__class__._agent_score_history.append(self._final_score)
126
  return self._to_oe_obs(obs, done=done, reward=_safe(float(reward)), info=info)
127
 
128
  @property
129
  def state(self):
130
  raw = self._env.state_dict()
131
  return CLMState(
132
+ workers=raw.get("workers", []), focus_mode=raw.get("focus_mode", False),
 
 
133
  tasks=raw.get("tasks", []), step_count=raw.get("time_step", 0),
134
  )
135
 
frontend/dist/assets/index-C3o0olYq.js ADDED
The diff for this file is too large to render. See raw diff
 
frontend/dist/assets/index-CV2RR57m.css ADDED
@@ -0,0 +1 @@
 
 
1
+ *,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.collapse{visibility:collapse}.sticky{position:sticky}.top-0{top:0}.top-6{top:1.5rem}.z-10{z-index:10}.mx-auto{margin-left:auto;margin-right:auto}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.ml-4{margin-left:1rem}.ml-auto{margin-left:auto}.mr-2{margin-right:.5rem}.mt-1{margin-top:.25rem}.mt-10{margin-top:2.5rem}.mt-3{margin-top:.75rem}.block{display:block}.inline-block{display:inline-block}.flex{display:flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-2{height:.5rem}.h-3{height:.75rem}.h-\[calc\(100vh-6rem\)\]{height:calc(100vh - 6rem)}.min-h-screen{min-height:100vh}.w-full{width:100%}.max-w-7xl{max-width:80rem}.flex-1{flex:1 1 0%}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.flex-col{flex-direction:column}.items-start{align-items:flex-start}.items-center{align-items:center}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-2{gap:.5rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-6{gap:1.5rem}.space-y-3>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.75rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.75rem * var(--tw-space-y-reverse))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1rem * var(--tw-space-y-reverse))}.space-y-6>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(1.5rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(1.5rem * var(--tw-space-y-reverse))}.overflow-hidden{overflow:hidden}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.rounded-xl{border-radius:.75rem}.rounded-t-xl{border-top-left-radius:.75rem;border-top-right-radius:.75rem}.border{border-width:1px}.border-b{border-bottom-width:1px}.border-emerald-500\/20{border-color:#10b98133}.border-emerald-500\/30{border-color:#10b9814d}.border-indigo-500\/30{border-color:#6366f14d}.border-indigo-500\/50{border-color:#6366f180}.border-red-500\/20{border-color:#ef444433}.border-slate-700{--tw-border-opacity: 1;border-color:rgb(51 65 85 / var(--tw-border-opacity, 1))}.border-slate-700\/50{border-color:#33415580}.border-slate-800{--tw-border-opacity: 1;border-color:rgb(30 41 59 / var(--tw-border-opacity, 1))}.bg-amber-500{--tw-bg-opacity: 1;background-color:rgb(245 158 11 / var(--tw-bg-opacity, 1))}.bg-amber-500\/20{background-color:#f59e0b33}.bg-emerald-500{--tw-bg-opacity: 1;background-color:rgb(16 185 129 / var(--tw-bg-opacity, 1))}.bg-emerald-500\/10{background-color:#10b9811a}.bg-emerald-500\/20{background-color:#10b98133}.bg-indigo-500{--tw-bg-opacity: 1;background-color:rgb(99 102 241 / var(--tw-bg-opacity, 1))}.bg-indigo-500\/10{background-color:#6366f11a}.bg-indigo-600{--tw-bg-opacity: 1;background-color:rgb(79 70 229 / var(--tw-bg-opacity, 1))}.bg-indigo-900\/40{background-color:#312e8166}.bg-red-500{--tw-bg-opacity: 1;background-color:rgb(239 68 68 / var(--tw-bg-opacity, 1))}.bg-red-500\/10{background-color:#ef44441a}.bg-red-500\/20{background-color:#ef444433}.bg-slate-700{--tw-bg-opacity: 1;background-color:rgb(51 65 85 / var(--tw-bg-opacity, 1))}.bg-slate-800{--tw-bg-opacity: 1;background-color:rgb(30 41 59 / var(--tw-bg-opacity, 1))}.bg-slate-800\/50{background-color:#1e293b80}.bg-slate-800\/80{background-color:#1e293bcc}.bg-slate-900{--tw-bg-opacity: 1;background-color:rgb(15 23 42 / var(--tw-bg-opacity, 1))}.bg-slate-900\/50{background-color:#0f172a80}.bg-gradient-to-r{background-image:linear-gradient(to right,var(--tw-gradient-stops))}.from-indigo-400{--tw-gradient-from: #818cf8 var(--tw-gradient-from-position);--tw-gradient-to: rgb(129 140 248 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.to-cyan-400{--tw-gradient-to: #22d3ee var(--tw-gradient-to-position)}.bg-clip-text{-webkit-background-clip:text;background-clip:text}.p-2\.5{padding:.625rem}.p-4{padding:1rem}.p-5{padding:1.25rem}.p-6{padding:1.5rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-4{padding-left:1rem;padding-right:1rem}.px-6{padding-left:1.5rem;padding-right:1.5rem}.py-0\.5{padding-top:.125rem;padding-bottom:.125rem}.py-1{padding-top:.25rem;padding-bottom:.25rem}.py-1\.5{padding-top:.375rem;padding-bottom:.375rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-4{padding-top:1rem;padding-bottom:1rem}.text-center{text-align:center}.text-right{text-align:right}.font-mono{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.font-sans{font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.capitalize{text-transform:capitalize}.text-amber-400{--tw-text-opacity: 1;color:rgb(251 191 36 / var(--tw-text-opacity, 1))}.text-emerald-400{--tw-text-opacity: 1;color:rgb(52 211 153 / var(--tw-text-opacity, 1))}.text-indigo-400{--tw-text-opacity: 1;color:rgb(129 140 248 / var(--tw-text-opacity, 1))}.text-red-400{--tw-text-opacity: 1;color:rgb(248 113 113 / var(--tw-text-opacity, 1))}.text-slate-100{--tw-text-opacity: 1;color:rgb(241 245 249 / var(--tw-text-opacity, 1))}.text-slate-200{--tw-text-opacity: 1;color:rgb(226 232 240 / var(--tw-text-opacity, 1))}.text-slate-300{--tw-text-opacity: 1;color:rgb(203 213 225 / var(--tw-text-opacity, 1))}.text-slate-400{--tw-text-opacity: 1;color:rgb(148 163 184 / var(--tw-text-opacity, 1))}.text-slate-500{--tw-text-opacity: 1;color:rgb(100 116 139 / var(--tw-text-opacity, 1))}.text-transparent{color:transparent}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity, 1))}.opacity-40{opacity:.4}.opacity-50{opacity:.5}.shadow-\[0_0_15px_rgba\(99\,102\,241\,0\.15\)\]{--tw-shadow: 0 0 15px rgba(99,102,241,.15);--tw-shadow-colored: 0 0 15px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.shadow-inner{--tw-shadow: inset 0 2px 4px 0 rgb(0 0 0 / .05);--tw-shadow-colored: inset 0 2px 4px 0 var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.shadow-sm{--tw-shadow: 0 1px 2px 0 rgb(0 0 0 / .05);--tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.shadow-xl{--tw-shadow: 0 20px 25px -5px rgb(0 0 0 / .1), 0 8px 10px -6px rgb(0 0 0 / .1);--tw-shadow-colored: 0 20px 25px -5px var(--tw-shadow-color), 0 8px 10px -6px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.outline-none{outline:2px solid transparent;outline-offset:2px}.outline{outline-style:solid}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.backdrop-blur{--tw-backdrop-blur: blur(8px);-webkit-backdrop-filter:var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);backdrop-filter:var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia)}.transition{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.transition-colors{transition-property:color,background-color,border-color,text-decoration-color,fill,stroke;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-300{transition-duration:.3s}.duration-500{transition-duration:.5s}.ease-out{transition-timing-function:cubic-bezier(0,0,.2,1)}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.selection\:bg-indigo-500\/30 *::-moz-selection{background-color:#6366f14d}.selection\:bg-indigo-500\/30 *::selection{background-color:#6366f14d}.selection\:bg-indigo-500\/30::-moz-selection{background-color:#6366f14d}.selection\:bg-indigo-500\/30::selection{background-color:#6366f14d}.hover\:scale-105:hover{--tw-scale-x: 1.05;--tw-scale-y: 1.05;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.hover\:border-slate-500:hover{--tw-border-opacity: 1;border-color:rgb(100 116 139 / var(--tw-border-opacity, 1))}.hover\:border-slate-600:hover{--tw-border-opacity: 1;border-color:rgb(71 85 105 / var(--tw-border-opacity, 1))}.hover\:bg-emerald-500\/20:hover{background-color:#10b98133}.hover\:bg-indigo-500:hover{--tw-bg-opacity: 1;background-color:rgb(99 102 241 / var(--tw-bg-opacity, 1))}.hover\:bg-indigo-500\/20:hover{background-color:#6366f133}.hover\:bg-slate-600:hover{--tw-bg-opacity: 1;background-color:rgb(71 85 105 / var(--tw-bg-opacity, 1))}.focus\:ring-2:focus{--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);box-shadow:var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow, 0 0 #0000)}.focus\:ring-indigo-500:focus{--tw-ring-opacity: 1;--tw-ring-color: rgb(99 102 241 / var(--tw-ring-opacity, 1))}.active\:scale-95:active{--tw-scale-x: .95;--tw-scale-y: .95;transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skew(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.disabled\:opacity-50:disabled{opacity:.5}.disabled\:hover\:bg-indigo-600:hover:disabled{--tw-bg-opacity: 1;background-color:rgb(79 70 229 / var(--tw-bg-opacity, 1))}.disabled\:hover\:bg-slate-700:hover:disabled{--tw-bg-opacity: 1;background-color:rgb(51 65 85 / var(--tw-bg-opacity, 1))}@media (min-width: 1024px){.lg\:col-span-2{grid-column:span 2 / span 2}.lg\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}}
frontend/dist/index.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>CLM Dashboard</title>
7
+ <script type="module" crossorigin src="/assets/index-C3o0olYq.js"></script>
8
+ <link rel="stylesheet" crossorigin href="/assets/index-CV2RR57m.css">
9
+ </head>
10
+ <body class="bg-slate-900 text-slate-100 font-sans">
11
+ <div id="root"></div>
12
+ </body>
13
+ </html>
frontend/src/App.jsx CHANGED
@@ -4,11 +4,11 @@ import Dashboard from './components/Dashboard'
4
  function App() {
5
  return (
6
  <div className="min-h-screen bg-slate-900 text-slate-100 selection:bg-indigo-500/30">
7
- <header className="border-b border-slate-800 bg-slate-900/50 backdrop-blur top-0 sticky z-10 px-6 py-4 flex items-center justify-between">
8
  <h1 className="text-xl font-bold bg-gradient-to-r from-indigo-400 to-cyan-400 bg-clip-text text-transparent">
9
  Cognitive Load Manager
10
  </h1>
11
- <div className="text-sm text-slate-400">OpenEnv Compliant Environment Dashboard</div>
12
  </header>
13
  <main className="p-6 max-w-7xl mx-auto">
14
  <Dashboard />
 
4
  function App() {
5
  return (
6
  <div className="min-h-screen bg-slate-900 text-slate-100 selection:bg-indigo-500/30">
7
+ <header className="border-b border-slate-800 bg-slate-900/50 backdrop-blur top-0 sticky z-10 px-6 py-4 flex items-center justify-center">
8
  <h1 className="text-xl font-bold bg-gradient-to-r from-indigo-400 to-cyan-400 bg-clip-text text-transparent">
9
  Cognitive Load Manager
10
  </h1>
11
+ {/* <div className="text-sm text-slate-400">OpenEnv Compliant Environment Dashboard</div> */}
12
  </header>
13
  <main className="p-6 max-w-7xl mx-auto">
14
  <Dashboard />
frontend/src/components/Dashboard.jsx CHANGED
@@ -1,234 +1,802 @@
1
- import React, { useState, useEffect, useRef } from 'react';
2
- import { RefreshCw, Briefcase, Coffee, Clock } from 'lucide-react';
3
 
4
- const API_BASE = 'http://localhost:8000';
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  export default function Dashboard() {
7
- const [level, setLevel] = useState('medium');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  const [sessionId, setSessionId] = useState(null);
9
- const [obs, setObs] = useState(null);
10
- const [stateData, setStateData] = useState(null);
11
- const [logs, setLogs] = useState([]);
12
  const [loading, setLoading] = useState(false);
 
13
  const [error, setError] = useState(null);
14
- const scrollRef = useRef(null);
15
 
16
- const fetchState = async (sid) => {
17
- try {
18
- const res = await fetch(`${API_BASE}/state?session_id=${sid}`);
19
- if (res.ok) {
20
- const data = await res.json();
21
- setStateData(data);
22
- }
23
- } catch(e) { console.error("State fetch error", e); }
24
- };
25
 
26
- const handleReset = async () => {
27
- setLoading(true);
28
- setError(null);
29
  try {
30
  const res = await fetch(`${API_BASE}/reset`, {
31
- method: 'POST',
32
- headers: { 'Content-Type': 'application/json' },
33
- body: JSON.stringify({ level })
34
  });
 
35
  const data = await res.json();
36
- setSessionId(data.session_id);
37
- setObs(data.observation);
38
- setLogs([{ type: 'system', msg: `Environment reset: ${level} level` }]);
39
- await fetchState(data.session_id);
40
- } catch (err) {
41
- setError(err.message || "Failed to connect to backend");
42
- } finally {
43
- setLoading(false);
44
- }
45
- };
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- const handleAction = async (actionType, taskId = null) => {
48
- if (!sessionId) return;
49
  setLoading(true);
50
-
51
- const action = { type: actionType };
52
- if (taskId) action.task_id = taskId;
53
-
54
  try {
55
  const res = await fetch(`${API_BASE}/step`, {
56
- method: 'POST',
57
- headers: { 'Content-Type': 'application/json' },
58
- body: JSON.stringify({ session_id: sessionId, action })
59
  });
60
  const data = await res.json();
61
- setObs(data.observation);
 
 
62
 
63
- let logMsg = `Action: ${actionType}${taskId ? ' ('+taskId+')' : ''} | Reward: ${data.reward.toFixed(2)}`;
64
- if (data.done) {
65
- logMsg += ` | DONE. Final Score: ${data.info?.final_score?.toFixed(2) || 'N/A'}`;
 
 
 
 
66
  }
67
 
68
- setLogs(prev => [...prev, { type: 'action', msg: logMsg, reward: data.reward }]);
69
- await fetchState(sessionId);
 
 
 
 
70
 
71
- setTimeout(() => {
72
- if(scrollRef.current) scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
73
- }, 50);
 
 
74
 
75
- } catch (err) {
76
- setError(err.message);
77
- } finally {
78
- setLoading(false);
79
- }
80
- };
 
 
81
 
82
  useEffect(() => {
83
  handleReset();
84
- }, [level]);
 
 
 
 
 
 
 
85
 
86
  return (
87
- <div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
88
- <div className="lg:col-span-2 space-y-6">
89
- <div className="bg-slate-800 p-4 rounded-xl border border-slate-700 flex items-center gap-4">
90
- <select
91
- value={level}
92
- onChange={e => setLevel(e.target.value)}
93
- className="bg-slate-900 border border-slate-700 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-indigo-500 outline-none"
94
- >
95
- <option value="easy">Easy</option>
96
- <option value="medium">Medium</option>
97
- <option value="hard">Hard</option>
98
- </select>
99
- <button
100
- onClick={handleReset}
101
- disabled={loading}
102
- className="flex items-center gap-2 bg-slate-700 hover:bg-slate-600 transition-colors px-4 py-2 rounded-lg text-sm font-medium"
103
- >
104
- <RefreshCw size={16} className={loading ? "animate-spin" : ""} /> Reset Env
105
- </button>
106
- <div className="ml-auto text-sm text-slate-400">
107
- Time Step: <span className="font-mono text-white bg-slate-900 px-2 py-1 rounded">{obs?.time_step || 0}</span>
108
  </div>
109
- {error && <span className="text-red-400 text-sm ml-4">{error}</span>}
110
  </div>
111
 
112
- <div className="grid grid-cols-2 gap-4">
113
- <div className="bg-slate-800 p-5 rounded-xl border border-slate-700 hover:border-slate-600 transition-colors">
114
- <div className="flex justify-between items-center mb-2">
115
- <span className="text-slate-400 text-sm">Energy</span>
116
- <span className="font-bold">{stateData ? (stateData.energy * 100).toFixed(0) : 0}%</span>
117
- </div>
118
- <div className="w-full bg-slate-900 rounded-full h-3">
119
- <div
120
- className={`h-3 rounded-full transition-all duration-500 ease-out ${stateData?.energy > 0.5 ? 'bg-emerald-500' : stateData?.energy > 0.2 ? 'bg-amber-500' : 'bg-red-500'}`}
121
- style={{ width: `${stateData ? stateData.energy * 100 : 0}%` }}
122
- ></div>
123
- </div>
124
- <div className="mt-3 text-xs text-slate-500 text-right">
125
- Obs: <span className="text-slate-300 capitalize">{obs?.visible_state?.fatigue_level || 'N/A'}</span>
126
- </div>
127
- </div>
128
-
129
- <div className="bg-slate-800 p-5 rounded-xl border border-slate-700 hover:border-slate-600 transition-colors">
130
- <div className="flex justify-between items-center mb-2">
131
- <span className="text-slate-400 text-sm">Stress</span>
132
- <span className="font-bold">{stateData ? (stateData.stress * 100).toFixed(0) : 0}%</span>
133
- </div>
134
- <div className="w-full bg-slate-900 rounded-full h-3">
135
- <div
136
- className={`h-3 rounded-full transition-all duration-500 ease-out ${stateData?.stress > 0.7 ? 'bg-red-500 w-full animate-pulse' : stateData?.stress > 0.4 ? 'bg-amber-500' : 'bg-emerald-500'}`}
137
- style={{ width: `${stateData ? stateData.stress * 100 : 0}%` }}
138
- ></div>
139
- </div>
140
- <div className="mt-3 text-xs text-slate-500 text-right">
141
- Warning: {obs?.visible_state?.stress_warning ? <span className="text-red-400 font-bold">YES</span> : <span className="text-emerald-400">NO</span>}
142
- </div>
143
- </div>
 
 
 
 
 
144
  </div>
 
145
 
146
- <div className="bg-slate-800 p-5 rounded-xl border border-slate-700">
147
- <h3 className="text-slate-400 text-sm mb-4">Environment Actions</h3>
148
- <div className="flex gap-4">
149
- <button disabled={loading} onClick={() => handleAction('break')} className="flex-1 flex flex-col items-center justify-center p-4 rounded-xl bg-indigo-500/10 hover:bg-indigo-500/20 border border-indigo-500/30 text-indigo-400 transition-all hover:scale-105 active:scale-95">
150
- <Coffee size={24} className="mb-2" />
151
- <span className="text-sm font-medium">Take Break</span>
152
- </button>
153
- <button disabled={loading} onClick={() => handleAction('delay')} className="flex-1 flex flex-col items-center justify-center p-4 rounded-xl bg-emerald-500/10 hover:bg-emerald-500/20 border border-emerald-500/30 text-emerald-400 transition-all hover:scale-105 active:scale-95">
154
- <Clock size={24} className="mb-2" />
155
- <span className="text-sm font-medium">Delay / Idle</span>
156
- </button>
157
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  </div>
159
 
160
- <div className="space-y-4">
161
- <h2 className="text-lg font-bold flex items-center gap-2 px-1">
162
- <Briefcase size={20} className="text-indigo-400" /> Active Tasks
163
- </h2>
164
- <div className="space-y-3">
165
- {obs?.tasks?.map(t => {
166
- const isCurrent = stateData?.current_task_id === t.id;
167
- const isDone = t.progress >= 1.0;
168
- const isLate = !isDone && t.deadline && obs.time_step > t.deadline;
169
- const isUrgent = !isDone && t.deadline && (t.deadline - obs.time_step <= 3) && (t.deadline - obs.time_step >= 0);
170
-
171
- return (
172
- <div key={t.id} className={`p-4 rounded-xl border transition-all ${isCurrent && !isDone ? 'bg-indigo-900/40 border-indigo-500/50 shadow-[0_0_15px_rgba(99,102,241,0.15)]' : 'bg-slate-800 border-slate-700 hover:border-slate-500'} ${isDone ? 'opacity-50' : ''}`}>
173
- <div className="flex justify-between items-start mb-3">
174
- <div>
175
- <h4 className="font-semibold flex items-center gap-2">
176
- {t.id}
177
- {isDone && <span className="text-xs bg-emerald-500/20 text-emerald-400 px-2 py-0.5 rounded-full">Done</span>}
178
- {isLate && <span className="text-xs bg-red-500/20 text-red-400 px-2 py-0.5 rounded-full">Late</span>}
179
- {isUrgent && <span className="text-xs bg-amber-500/20 text-amber-400 px-2 py-0.5 rounded-full">Urgent</span>}
180
- </h4>
181
- <div className="text-xs text-slate-400 mt-1 flex gap-3">
182
- <span>Diff: <span className="capitalize text-slate-300">{t.difficulty}</span></span>
183
- {t.deadline && <span>Deadline: <span className="font-mono text-slate-300">{t.deadline}</span></span>}
 
 
 
 
 
184
  </div>
185
  </div>
186
- <div className="flex gap-2">
187
- <button
188
- onClick={() => handleAction('work', t.id)}
189
- disabled={loading || isDone}
190
- className="px-4 py-1.5 bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 disabled:hover:bg-indigo-600 rounded text-sm font-medium transition-colors shadow-sm"
191
- >
192
- Work
193
- </button>
194
- {!isCurrent && (
195
- <button
196
- onClick={() => handleAction('switch', t.id)}
197
- disabled={loading || isDone}
198
- className="px-4 py-1.5 bg-slate-700 hover:bg-slate-600 disabled:opacity-50 disabled:hover:bg-slate-700 rounded text-sm font-medium transition-colors shadow-sm"
199
- >
200
- Switch
201
- </button>
202
- )}
203
  </div>
 
 
 
 
 
 
 
204
  </div>
205
- <div className="w-full bg-slate-900 mb-1 rounded-full h-2 overflow-hidden shadow-inner">
206
- <div
207
- className={`h-2 rounded-full transition-all duration-300 ease-out ${isDone ? 'bg-emerald-500' : 'bg-indigo-500'}`}
208
- style={{ width: `${Math.min(100, t.progress * 100)}%` }}
209
- ></div>
210
- </div>
211
- </div>
212
- );
213
- })}
214
- </div>
215
- </div>
216
- </div>
217
 
218
- <div className="bg-slate-800 rounded-xl border border-slate-700 flex flex-col h-[calc(100vh-6rem)] sticky top-6 shadow-xl">
219
- <div className="p-4 border-b border-slate-700 bg-slate-900/50 rounded-t-xl">
220
- <h3 className="font-bold text-slate-200">Activity Log</h3>
 
221
  </div>
222
- <div className="p-4 overflow-y-auto flex-1 space-y-3 font-mono text-xs" ref={scrollRef}>
223
- {logs.length === 0 && <div className="text-slate-500 text-center mt-10">No activity yet.</div>}
224
- {logs.map((log, i) => (
225
- <div key={i} className={`p-2.5 rounded border ${log.type === 'system' ? 'text-slate-400 border-slate-700/50 bg-slate-800/50' : log.reward > 0 ? 'text-emerald-400 bg-emerald-500/10 border-emerald-500/20' : log.reward < 0 ? 'text-red-400 bg-red-500/10 border-red-500/20' : 'text-slate-300 border-slate-700 bg-slate-800/80'}`}>
226
- <span className="opacity-40 mr-2">[{i.toString().padStart(3, '0')}]</span>
227
- {log.msg}
 
 
 
 
228
  </div>
229
- ))}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  </div>
231
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  </div>
233
  );
234
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // import React, { useState, useEffect, useRef } from 'react';
2
+ // import { RefreshCw, Briefcase, Coffee, Clock } from 'lucide-react';
3
 
4
+ // const API_BASE = 'http://localhost:8000';
5
 
6
+ // export default function Dashboard() {
7
+ // const [level, setLevel] = useState('medium');
8
+ // const [sessionId, setSessionId] = useState(null);
9
+ // const [obs, setObs] = useState(null);
10
+ // const [stateData, setStateData] = useState(null);
11
+ // const [logs, setLogs] = useState([]);
12
+ // const [loading, setLoading] = useState(false);
13
+ // const [error, setError] = useState(null);
14
+ // const scrollRef = useRef(null);
15
+
16
+ // const fetchState = async (sid) => {
17
+ // try {
18
+ // const res = await fetch(`${API_BASE}/state?session_id=${sid}`);
19
+ // if (res.ok) {
20
+ // const data = await res.json();
21
+ // setStateData(data);
22
+ // }
23
+ // } catch(e) { console.error("State fetch error", e); }
24
+ // };
25
+
26
+ // const handleReset = async () => {
27
+ // setLoading(true);
28
+ // setError(null);
29
+ // try {
30
+ // const res = await fetch(`${API_BASE}/reset`, {
31
+ // method: 'POST',
32
+ // headers: { 'Content-Type': 'application/json' },
33
+ // body: JSON.stringify({ level })
34
+ // });
35
+ // const data = await res.json();
36
+ // setSessionId(data.session_id);
37
+ // setObs(data.observation);
38
+ // setLogs([{ type: 'system', msg: `Environment reset: ${level} level` }]);
39
+ // await fetchState(data.session_id);
40
+ // } catch (err) {
41
+ // setError(err.message || "Failed to connect to backend");
42
+ // } finally {
43
+ // setLoading(false);
44
+ // }
45
+ // };
46
+
47
+ // const handleAction = async (actionType, taskId = null) => {
48
+ // if (!sessionId) return;
49
+ // setLoading(true);
50
+
51
+ // const action = { type: actionType };
52
+ // if (taskId) action.task_id = taskId;
53
+
54
+ // try {
55
+ // const res = await fetch(`${API_BASE}/step`, {
56
+ // method: 'POST',
57
+ // headers: { 'Content-Type': 'application/json' },
58
+ // body: JSON.stringify({ session_id: sessionId, action })
59
+ // });
60
+ // const data = await res.json();
61
+ // setObs(data.observation);
62
+
63
+ // let logMsg = `Action: ${actionType}${taskId ? ' ('+taskId+')' : ''} | Reward: ${data.reward.toFixed(2)}`;
64
+ // if (data.done) {
65
+ // logMsg += ` | DONE. Final Score: ${data.info?.final_score?.toFixed(2) || 'N/A'}`;
66
+ // }
67
+
68
+ // setLogs(prev => [...prev, { type: 'action', msg: logMsg, reward: data.reward }]);
69
+ // await fetchState(sessionId);
70
+
71
+ // setTimeout(() => {
72
+ // if(scrollRef.current) scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
73
+ // }, 50);
74
+
75
+ // } catch (err) {
76
+ // setError(err.message);
77
+ // } finally {
78
+ // setLoading(false);
79
+ // }
80
+ // };
81
+
82
+ // useEffect(() => {
83
+ // handleReset();
84
+ // }, [level]);
85
+
86
+ // return (
87
+ // <div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
88
+ // <div className="lg:col-span-2 space-y-6">
89
+ // <div className="bg-slate-800 p-4 rounded-xl border border-slate-700 flex items-center gap-4">
90
+ // <select
91
+ // value={level}
92
+ // onChange={e => setLevel(e.target.value)}
93
+ // className="bg-slate-900 border border-slate-700 rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-indigo-500 outline-none"
94
+ // >
95
+ // <option value="easy">Easy</option>
96
+ // <option value="medium">Medium</option>
97
+ // <option value="hard">Hard</option>
98
+ // </select>
99
+ // <button
100
+ // onClick={handleReset}
101
+ // disabled={loading}
102
+ // className="flex items-center gap-2 bg-slate-700 hover:bg-slate-600 transition-colors px-4 py-2 rounded-lg text-sm font-medium"
103
+ // >
104
+ // <RefreshCw size={16} className={loading ? "animate-spin" : ""} /> Reset Env
105
+ // </button>
106
+ // <div className="ml-auto text-sm text-slate-400">
107
+ // Time Step: <span className="font-mono text-white bg-slate-900 px-2 py-1 rounded">{obs?.time_step || 0}</span>
108
+ // </div>
109
+ // {error && <span className="text-red-400 text-sm ml-4">{error}</span>}
110
+ // </div>
111
+
112
+ // <div className="grid grid-cols-2 gap-4">
113
+ // <div className="bg-slate-800 p-5 rounded-xl border border-slate-700 hover:border-slate-600 transition-colors">
114
+ // <div className="flex justify-between items-center mb-2">
115
+ // <span className="text-slate-400 text-sm">Energy</span>
116
+ // <span className="font-bold">{stateData ? (stateData.energy * 100).toFixed(0) : 0}%</span>
117
+ // </div>
118
+ // <div className="w-full bg-slate-900 rounded-full h-3">
119
+ // <div
120
+ // className={`h-3 rounded-full transition-all duration-500 ease-out ${stateData?.energy > 0.5 ? 'bg-emerald-500' : stateData?.energy > 0.2 ? 'bg-amber-500' : 'bg-red-500'}`}
121
+ // style={{ width: `${stateData ? stateData.energy * 100 : 0}%` }}
122
+ // ></div>
123
+ // </div>
124
+ // <div className="mt-3 text-xs text-slate-500 text-right">
125
+ // Obs: <span className="text-slate-300 capitalize">{obs?.visible_state?.fatigue_level || 'N/A'}</span>
126
+ // </div>
127
+ // </div>
128
+
129
+ // <div className="bg-slate-800 p-5 rounded-xl border border-slate-700 hover:border-slate-600 transition-colors">
130
+ // <div className="flex justify-between items-center mb-2">
131
+ // <span className="text-slate-400 text-sm">Stress</span>
132
+ // <span className="font-bold">{stateData ? (stateData.stress * 100).toFixed(0) : 0}%</span>
133
+ // </div>
134
+ // <div className="w-full bg-slate-900 rounded-full h-3">
135
+ // <div
136
+ // className={`h-3 rounded-full transition-all duration-500 ease-out ${stateData?.stress > 0.7 ? 'bg-red-500 w-full animate-pulse' : stateData?.stress > 0.4 ? 'bg-amber-500' : 'bg-emerald-500'}`}
137
+ // style={{ width: `${stateData ? stateData.stress * 100 : 0}%` }}
138
+ // ></div>
139
+ // </div>
140
+ // <div className="mt-3 text-xs text-slate-500 text-right">
141
+ // Warning: {obs?.visible_state?.stress_warning ? <span className="text-red-400 font-bold">YES</span> : <span className="text-emerald-400">NO</span>}
142
+ // </div>
143
+ // </div>
144
+ // </div>
145
+
146
+ // <div className="bg-slate-800 p-5 rounded-xl border border-slate-700">
147
+ // <h3 className="text-slate-400 text-sm mb-4">Environment Actions</h3>
148
+ // <div className="flex gap-4">
149
+ // <button disabled={loading} onClick={() => handleAction('break')} className="flex-1 flex flex-col items-center justify-center p-4 rounded-xl bg-indigo-500/10 hover:bg-indigo-500/20 border border-indigo-500/30 text-indigo-400 transition-all hover:scale-105 active:scale-95">
150
+ // <Coffee size={24} className="mb-2" />
151
+ // <span className="text-sm font-medium">Take Break</span>
152
+ // </button>
153
+ // <button disabled={loading} onClick={() => handleAction('delay')} className="flex-1 flex flex-col items-center justify-center p-4 rounded-xl bg-emerald-500/10 hover:bg-emerald-500/20 border border-emerald-500/30 text-emerald-400 transition-all hover:scale-105 active:scale-95">
154
+ // <Clock size={24} className="mb-2" />
155
+ // <span className="text-sm font-medium">Delay / Idle</span>
156
+ // </button>
157
+ // </div>
158
+ // </div>
159
+
160
+ // <div className="space-y-4">
161
+ // <h2 className="text-lg font-bold flex items-center gap-2 px-1">
162
+ // <Briefcase size={20} className="text-indigo-400" /> Active Tasks
163
+ // </h2>
164
+ // <div className="space-y-3">
165
+ // {obs?.tasks?.map(t => {
166
+ // const isCurrent = stateData?.current_task_id === t.id;
167
+ // const isDone = t.progress >= 1.0;
168
+ // const isLate = !isDone && t.deadline && obs.time_step > t.deadline;
169
+ // const isUrgent = !isDone && t.deadline && (t.deadline - obs.time_step <= 3) && (t.deadline - obs.time_step >= 0);
170
+
171
+ // return (
172
+ // <div key={t.id} className={`p-4 rounded-xl border transition-all ${isCurrent && !isDone ? 'bg-indigo-900/40 border-indigo-500/50 shadow-[0_0_15px_rgba(99,102,241,0.15)]' : 'bg-slate-800 border-slate-700 hover:border-slate-500'} ${isDone ? 'opacity-50' : ''}`}>
173
+ // <div className="flex justify-between items-start mb-3">
174
+ // <div>
175
+ // <h4 className="font-semibold flex items-center gap-2">
176
+ // {t.id}
177
+ // {isDone && <span className="text-xs bg-emerald-500/20 text-emerald-400 px-2 py-0.5 rounded-full">Done</span>}
178
+ // {isLate && <span className="text-xs bg-red-500/20 text-red-400 px-2 py-0.5 rounded-full">Late</span>}
179
+ // {isUrgent && <span className="text-xs bg-amber-500/20 text-amber-400 px-2 py-0.5 rounded-full">Urgent</span>}
180
+ // </h4>
181
+ // <div className="text-xs text-slate-400 mt-1 flex gap-3">
182
+ // <span>Diff: <span className="capitalize text-slate-300">{t.difficulty}</span></span>
183
+ // {t.deadline && <span>Deadline: <span className="font-mono text-slate-300">{t.deadline}</span></span>}
184
+ // </div>
185
+ // </div>
186
+ // <div className="flex gap-2">
187
+ // <button
188
+ // onClick={() => handleAction('work', t.id)}
189
+ // disabled={loading || isDone}
190
+ // className="px-4 py-1.5 bg-indigo-600 hover:bg-indigo-500 disabled:opacity-50 disabled:hover:bg-indigo-600 rounded text-sm font-medium transition-colors shadow-sm"
191
+ // >
192
+ // Work
193
+ // </button>
194
+ // {!isCurrent && (
195
+ // <button
196
+ // onClick={() => handleAction('switch', t.id)}
197
+ // disabled={loading || isDone}
198
+ // className="px-4 py-1.5 bg-slate-700 hover:bg-slate-600 disabled:opacity-50 disabled:hover:bg-slate-700 rounded text-sm font-medium transition-colors shadow-sm"
199
+ // >
200
+ // Switch
201
+ // </button>
202
+ // )}
203
+ // </div>
204
+ // </div>
205
+ // <div className="w-full bg-slate-900 mb-1 rounded-full h-2 overflow-hidden shadow-inner">
206
+ // <div
207
+ // className={`h-2 rounded-full transition-all duration-300 ease-out ${isDone ? 'bg-emerald-500' : 'bg-indigo-500'}`}
208
+ // style={{ width: `${Math.min(100, t.progress * 100)}%` }}
209
+ // ></div>
210
+ // </div>
211
+ // </div>
212
+ // );
213
+ // })}
214
+ // </div>
215
+ // </div>
216
+ // </div>
217
+
218
+ // <div className="bg-slate-800 rounded-xl border border-slate-700 flex flex-col h-[calc(100vh-6rem)] sticky top-6 shadow-xl">
219
+ // <div className="p-4 border-b border-slate-700 bg-slate-900/50 rounded-t-xl">
220
+ // <h3 className="font-bold text-slate-200">Activity Log</h3>
221
+ // </div>
222
+ // <div className="p-4 overflow-y-auto flex-1 space-y-3 font-mono text-xs" ref={scrollRef}>
223
+ // {logs.length === 0 && <div className="text-slate-500 text-center mt-10">No activity yet.</div>}
224
+ // {logs.map((log, i) => (
225
+ // <div key={i} className={`p-2.5 rounded border ${log.type === 'system' ? 'text-slate-400 border-slate-700/50 bg-slate-800/50' : log.reward > 0 ? 'text-emerald-400 bg-emerald-500/10 border-emerald-500/20' : log.reward < 0 ? 'text-red-400 bg-red-500/10 border-red-500/20' : 'text-slate-300 border-slate-700 bg-slate-800/80'}`}>
226
+ // <span className="opacity-40 mr-2">[{i.toString().padStart(3, '0')}]</span>
227
+ // {log.msg}
228
+ // </div>
229
+ // ))}
230
+ // </div>
231
+ // </div>
232
+ // </div>
233
+ // );
234
+ // }
235
+
236
+
237
+ import React, { useState, useEffect, useRef, useCallback } from 'react';
238
+
239
+ const API_BASE = 'http://localhost:7860';
240
+
241
+ /* ── helpers ─────────────────────────────────────────────── */
242
+ const fmt2 = n => (+(n ?? 0)).toFixed(2);
243
+ const clamp = (v, lo, hi) => Math.min(hi, Math.max(lo, v));
244
+
245
+ /* ── seed data (shown before backend connects) ───────────── */
246
+ /* ── empty starting constants ───────────── */
247
+ const SEED_TASKS = [];
248
+ const SEED_TRAINED = [0.30, 0.31, 0.35, 0.39, 0.45, 0.51, 0.60, 0.66, 0.73, 0.78, 0.82, 0.85, 0.86, 0.87, 0.88];
249
+ const SEED_EPISODE = 15;
250
+ const AGENT_MSGS = [
251
+ { from: 'manager', text: 'Simulating multi-agent layer. Manager checks stress levels and issues system prompts dynamically to keep the LLM worker aligned.' },
252
+ { from: 'env', text: 'This demo environment is connected to the fully functional FastAPI backend. You can manually execute steps.' }
253
+ ];
254
+ const DRIFT_EVENTS = [];
255
+ const ACTION_LOG = [];
256
+
257
+ /* ── priority badge colours ──────────────────────────────── */
258
+ const PRIORITY_STYLE = {
259
+ critical: { bg: '#fef2f2', color: '#dc2626', border: '#fecaca' },
260
+ high: { bg: '#fff7ed', color: '#c2410c', border: '#fed7aa' },
261
+ blocked: { bg: '#f1f5f9', color: '#64748b', border: '#cbd5e1' },
262
+ normal: { bg: '#f0fdf4', color: '#15803d', border: '#bbf7d0' },
263
+ medium: { bg: '#fff7ed', color: '#b45309', border: '#fde68a' },
264
+ };
265
+
266
+ const PROGRESS_COLOR = {
267
+ critical: '#dc2626', high: '#f97316', blocked: '#94a3b8', normal: '#22c55e', medium: '#f59e0b',
268
+ };
269
+
270
+ /* ── reward curve SVG ────────────────────────────────────── */
271
+ function RewardCurve({ trained = SEED_TRAINED, episode = SEED_EPISODE }) {
272
+ const W = 560, H = 160, pL = 36, pB = 28, pR = 16, pT = 12;
273
+ const cW = W - pL - pR, cH = H - pT - pB;
274
+ const BASELINE = 0.30;
275
+ const yS = v => pT + cH - clamp((v / 1.0) * cH, 0, cH);
276
+ const xS = (i, len) => pL + (i / Math.max(len - 1, 1)) * cW;
277
+ const pts = trained.map((v, i) => `${xS(i, trained.length)},${yS(v)}`).join(' ');
278
+ const ticks = [0, 0.2, 0.4, 0.6, 0.8, 1.0];
279
+ const epLabels = ['ep 1', `ep ${Math.round(episode / 2)}`, `ep ${episode}`];
280
+
281
+ return (
282
+ <div>
283
+ <svg width="100%" viewBox={`0 0 ${W} ${H}`} style={{ display: 'block' }}>
284
+ {/* grid lines */}
285
+ {ticks.map(v => (
286
+ <g key={v}>
287
+ <line x1={pL} y1={yS(v)} x2={W - pR} y2={yS(v)} stroke="#e2e8f0" strokeWidth={1} />
288
+ <text x={pL - 4} y={yS(v) + 3.5} fill="#94a3b8" fontSize={9} textAnchor="end">{v.toFixed(1)}</text>
289
+ </g>
290
+ ))}
291
+ {/* baseline dashed */}
292
+ <line x1={pL} y1={yS(BASELINE)} x2={W - pR} y2={yS(BASELINE)}
293
+ stroke="#f87171" strokeWidth={1.5} strokeDasharray="5 4" />
294
+ {/* baseline end label */}
295
+ <circle cx={W - pR} cy={yS(BASELINE)} r={4} fill="#f87171" />
296
+
297
+ {/* trained area */}
298
+ {trained.length > 1 && <>
299
+ <defs>
300
+ <linearGradient id="tGrad" x1="0" y1="0" x2="0" y2="1">
301
+ <stop offset="0%" stopColor="#22c55e" stopOpacity="0.18" />
302
+ <stop offset="100%" stopColor="#22c55e" stopOpacity="0.02" />
303
+ </linearGradient>
304
+ </defs>
305
+ <polygon
306
+ points={`${pL},${yS(0)} ${pts} ${xS(trained.length - 1, trained.length)},${yS(0)}`}
307
+ fill="url(#tGrad)" />
308
+ <polyline points={pts} fill="none" stroke="#22c55e" strokeWidth={2.5}
309
+ strokeLinecap="round" strokeLinejoin="round" />
310
+ <circle cx={xS(trained.length - 1, trained.length)} cy={yS(trained[trained.length - 1])} r={5}
311
+ fill="#22c55e" stroke="#fff" strokeWidth={2} />
312
+ </>}
313
+
314
+ {/* x axis labels */}
315
+ {epLabels.map((label, i) => {
316
+ const x = pL + (i / 2) * cW;
317
+ return <text key={i} x={x} y={H - 4} fill="#94a3b8" fontSize={9} textAnchor="middle">{label}</text>;
318
+ })}
319
+ </svg>
320
+ {/* legend */}
321
+ <div style={{ display: 'flex', gap: 20, marginTop: 4, paddingLeft: pL }}>
322
+ <div style={{ display: 'flex', alignItems: 'center', gap: 6, fontSize: 11, color: '#64748b' }}>
323
+ <svg width={24} height={8}><line x1={0} y1={4} x2={24} y2={4} stroke="#f87171" strokeWidth={1.5} strokeDasharray="4 3" /></svg>
324
+ Baseline (untrained)
325
+ </div>
326
+ <div style={{ display: 'flex', alignItems: 'center', gap: 6, fontSize: 11, color: '#64748b' }}>
327
+ <svg width={24} height={8}><line x1={0} y1={4} x2={24} y2={4} stroke="#22c55e" strokeWidth={2.5} /></svg>
328
+ GRPO trained agent
329
+ </div>
330
+ </div>
331
+ </div>
332
+ );
333
+ }
334
+
335
+ /* ── main dashboard ──────────────────────────────────────── */
336
  export default function Dashboard() {
337
+ const [level, setLevel] = useState('hard');
338
+ const [targetWorker, setTargetWorker] = useState('w1');
339
+ const [episode, setEpisode] = useState(SEED_EPISODE);
340
+ const [step, setStep] = useState(0);
341
+ const [maxStep, setMaxStep] = useState(50);
342
+ const [workers, setWorkers] = useState([
343
+ { id: 'w1', energy: 1.0, stress: 0.0, expertise: 'analytical' },
344
+ { id: 'w2', energy: 1.0, stress: 0.0, expertise: 'social' },
345
+ { id: 'w3', energy: 1.0, stress: 0.0, expertise: 'analytical' }
346
+ ]);
347
+ const [epReward, setEpReward] = useState(0.0);
348
+ const [tasks, setTasks] = useState(SEED_TASKS);
349
+ const [trained, setTrained] = useState(SEED_TRAINED);
350
+ const [agentMsgs, setAgentMsgs] = useState(AGENT_MSGS);
351
+ const [actionLog, setActionLog] = useState(ACTION_LOG);
352
+ const [schemaDrifts, setSchemaDrifts] = useState(DRIFT_EVENTS);
353
  const [sessionId, setSessionId] = useState(null);
 
 
 
354
  const [loading, setLoading] = useState(false);
355
+ const [liveMode, setLiveMode] = useState(false);
356
  const [error, setError] = useState(null);
357
+ const logRef = useRef(null);
358
 
359
+ const doneTasks = tasks.filter(t => t.progress >= 1).length;
360
+ const blockedCount = tasks.filter(t => t.priority === 'blocked').length;
361
+ const overdueCount = tasks.filter(t => t.priority === 'critical' && t.progress < 1).length;
 
 
 
 
 
 
362
 
363
+ /* ── backend integration ── */
364
+ const handleReset = useCallback(async () => {
365
+ setLoading(true); setError(null);
366
  try {
367
  const res = await fetch(`${API_BASE}/reset`, {
368
+ method: 'POST', headers: { 'Content-Type': 'application/json' },
369
+ body: JSON.stringify({ task_id: level }),
 
370
  });
371
+ if (!res.ok) throw new Error('Server error');
372
  const data = await res.json();
373
+ const obs = data.observation || data;
374
+ setSessionId('active');
375
+ setStep(obs.time_step ?? 0);
376
+ setMaxStep(level === 'expert' ? 60 : 50);
377
+ if (obs.workers) setWorkers(obs.workers);
378
+ setEpReward(0.0);
379
+ setEpisode(e => e + 1);
380
+ setLiveMode(true);
381
+ setAgentMsgs([{ from: 'env', text: `Episode reset · ${level} difficulty · Oracle Manager managing 3 FTEs` }]);
382
+ setSchemaDrifts([]);
383
+ setActionLog([]);
384
+ if (obs.tasks) {
385
+ setTasks(obs.tasks.map(t => ({
386
+ id: t.id, name: t.task_type || t.id, deadline: t.deadline ? `step ${t.deadline}` : 'None',
387
+ deps: t.depends_on ? `deps on ${t.depends_on}` : 'no deps', priority: t.priority || 'normal', progress: t.progress || 0, icon: '📋'
388
+ })));
389
+ }
390
+ } catch (e) {
391
+ setError('Backend offline');
392
+ setLiveMode(false);
393
+ } finally { setLoading(false); }
394
+ }, [level]);
395
 
396
+ const doAction = useCallback(async (type, taskId = null) => {
397
+ if (!sessionId) { setError('Reset first'); return; }
398
  setLoading(true);
399
+ const action = { type, worker_id: targetWorker, ...(taskId ? { task_id: taskId } : {}) };
 
 
 
400
  try {
401
  const res = await fetch(`${API_BASE}/step`, {
402
+ method: 'POST', headers: { 'Content-Type': 'application/json' },
403
+ body: JSON.stringify({ action }),
 
404
  });
405
  const data = await res.json();
406
+ const r = data.reward ?? 0;
407
+ const obs = data.observation || data;
408
+ const newStep = obs.time_step ?? step + 1;
409
 
410
+ setStep(newStep);
411
+ setEpReward(prev => +(prev + r).toFixed(3));
412
+
413
+ if (obs.workers) setWorkers(obs.workers);
414
+
415
+ if (obs.schema_drift) {
416
+ setSchemaDrifts(prev => [...prev, obs.schema_drift]);
417
  }
418
 
419
+ if (obs.tasks) {
420
+ setTasks(obs.tasks.map(t => ({
421
+ id: t.id, name: t.task_type || t.id, deadline: t.deadline ? `step ${t.deadline}` : 'None',
422
+ deps: t.depends_on ? `deps on ${t.depends_on}` : 'no deps', priority: t.priority || 'normal', progress: t.progress || 0, icon: '📋'
423
+ })));
424
+ }
425
 
426
+ const logEntry = {
427
+ step: `s${newStep}`, action: type, detail: taskId ?? '—',
428
+ reward: (r >= 0 ? '+' : '') + fmt2(r), pos: r >= 0
429
+ };
430
+ setActionLog(prev => [logEntry, ...prev].slice(0, 30));
431
 
432
+ if (data.done) {
433
+ const fs = obs.final_score ?? 0;
434
+ setTrained(prev => [...prev, fs]);
435
+ setAgentMsgs(prev => [...prev, { from: 'env', text: `Episode done · final score ${fmt2(fs)}` }]);
436
+ }
437
+ } catch (e) { setError(e.message); }
438
+ finally { setLoading(false); }
439
+ }, [sessionId, step, workers, targetWorker]);
440
 
441
  useEffect(() => {
442
  handleReset();
443
+ }, [handleReset]);
444
+
445
+ /* ── level badge colour ── */
446
+ const LEVEL_STYLE = {
447
+ easy: { bg: '#dcfce7', c: '#15803d' }, medium: { bg: '#fef3c7', c: '#b45309' },
448
+ hard: { bg: '#fee2e2', c: '#dc2626' }, expert: { bg: '#f3e8ff', c: '#7c3aed' }
449
+ };
450
+ const lvl = LEVEL_STYLE[level] || LEVEL_STYLE.hard;
451
 
452
  return (
453
+ <div style={{
454
+ fontFamily: "'DM Sans', 'Helvetica Neue', Arial, sans-serif",
455
+ background: '#f8fafc', minHeight: '100vh', padding: '0 0 32px 0',
456
+ color: '#1e293b',
457
+ }}>
458
+ <link href="https://fonts.googleapis.com/css2?family=DM+Sans:wght@300;400;500;600;700&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet" />
459
+
460
+ {/* ── TOP NAV ── */}
461
+ <div style={{
462
+ background: '#fff', borderBottom: '1px solid #e2e8f0',
463
+ padding: '0 24px', height: 48, display: 'flex', alignItems: 'center', gap: 16,
464
+ position: 'sticky', top: 0, zIndex: 10,
465
+ }}>
466
+ <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
467
+ <div style={{ width: 20, height: 20, borderRadius: 6, background: 'linear-gradient(135deg,#6366f1,#8b5cf6)', display: 'flex', alignItems: 'center', justifyContent: 'center' }}>
468
+ <span style={{ fontSize: 11 }}>🧠</span>
 
 
 
 
 
469
  </div>
470
+ <span style={{ fontWeight: 700, fontSize: 15, color: '#0f172a', letterSpacing: '-0.02em' }}>StressTest</span>
471
  </div>
472
 
473
+ <Pill color="#22c55e" label="Live" />
474
+ <Pill color="#6366f1" label="Training" />
475
+ <Pill color="#f59e0b" label={`Episode ${episode}`} />
476
+
477
+ {error && <span style={{ fontSize: 11, color: '#ef4444', marginLeft: 4 }}>{error}</span>}
478
+
479
+ <div style={{ marginLeft: 'auto', display: 'flex', alignItems: 'center', gap: 12 }}>
480
+ <select value={targetWorker} onChange={e => setTargetWorker(e.target.value)}
481
+ style={{
482
+ fontSize: 12, border: '1px solid #e2e8f0', borderRadius: 6, padding: '4px 10px',
483
+ background: '#f8fafc', color: '#1e293b', outline: 'none', cursor: 'pointer', fontWeight: 600
484
+ }}>
485
+ <option value="w1">🎯 Assign to Employee 1</option>
486
+ <option value="w2">🎯 Assign to Employee 2</option>
487
+ <option value="w3">🎯 Assign to Employee 3</option>
488
+ </select>
489
+ <select value={level} onChange={e => { setLevel(e.target.value) }}
490
+ style={{
491
+ fontSize: 12, border: '1px solid #e2e8f0', borderRadius: 6, padding: '4px 10px',
492
+ background: '#fff', color: '#1e293b', outline: 'none', cursor: 'pointer'
493
+ }}>
494
+ {['easy', 'medium', 'hard', 'expert'].map(l => <option key={l}>{l}</option>)}
495
+ </select>
496
+ <button onClick={handleReset} disabled={loading} style={{
497
+ fontSize: 12, border: '1px solid #e2e8f0', borderRadius: 6, padding: '4px 12px',
498
+ background: loading ? '#f1f5f9' : '#fff', color: '#64748b', cursor: 'pointer',
499
+ display: 'flex', alignItems: 'center', gap: 5,
500
+ }}>
501
+ <span style={{ display: 'inline-block', animation: loading ? 'spin 1s linear infinite' : 'none' }}>↻</span> Reset
502
+ </button>
503
+ <span style={{ fontSize: 12, color: '#64748b' }}>
504
+ Step <b style={{ color: '#0f172a', fontFamily: 'DM Mono,monospace' }}>{step} / {maxStep}</b>
505
+ </span>
506
+ <div style={{
507
+ background: lvl.bg, color: lvl.c, fontSize: 11, fontWeight: 700,
508
+ padding: '3px 10px', borderRadius: 6, letterSpacing: '0.04em', textTransform: 'capitalize',
509
+ }}>{level}</div>
510
  </div>
511
+ </div>
512
 
513
+ <div style={{ maxWidth: 1200, margin: '0 auto', padding: '20px 24px', display: 'flex', flexDirection: 'column', gap: 16 }}>
514
+
515
+ {/* ── ROW 1: 3 FTEs + overall stats ── */}
516
+ <div style={{ display: 'grid', gridTemplateColumns: 'repeat(5, 1fr)', gap: 14 }}>
517
+ {(workers || []).map(w => {
518
+ const wid = w?.id || 'w?';
519
+ const wexp = w?.expertise || 'none';
520
+ const weng = w?.energy ?? 0;
521
+ const wstress = w?.stress ?? 0;
522
+ return (
523
+ <StatCard key={wid}
524
+ label={`Employee ${wid.replace('w','')} (${wexp.charAt(0).toUpperCase() + wexp.slice(1)})`}
525
+ value={`Energy: ${(weng * 100).toFixed(0)}%`}
526
+ sub={wstress > 0.65 ? 'Elevated Stress Level' : (weng < 0.35 ? 'High Fatigue' : `Stress: ${(wstress * 100).toFixed(0)}%`)}
527
+ bar={weng} barColor={weng > 0.5 ? '#22c55e' : weng > 0.25 ? '#f59e0b' : '#ef4444'}
528
+ />
529
+ );
530
+ })}
531
+ <StatCard
532
+ label="Episode reward"
533
+ value={(epReward >= 0 ? '+' : '') + epReward.toFixed(2)}
534
+ valueColor={epReward >= 0 ? '#22c55e' : '#ef4444'}
535
+ sub={`vs baseline 0.30`}
536
+ />
537
+ <StatCard
538
+ label="Tasks done"
539
+ value={`${doneTasks} / ${tasks.length}`}
540
+ sub={`${blockedCount} blocked, ${overdueCount} overdue`}
541
+ bar={doneTasks / Math.max(tasks.length, 1)} barColor="#6366f1"
542
+ />
543
  </div>
544
 
545
+ {/* ── ROW 2: task queue + reward curve ── */}
546
+ <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 14 }}>
547
+
548
+ {/* task queue */}
549
+ <Card label="TASK QUEUE">
550
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
551
+ {tasks.map(t => {
552
+ const ps = PRIORITY_STYLE[t.priority] || PRIORITY_STYLE.normal;
553
+ const pc = PROGRESS_COLOR[t.priority] || '#6366f1';
554
+ return (
555
+ <div key={t.id} style={{
556
+ display: 'flex', alignItems: 'center', gap: 10,
557
+ padding: '9px 4px', borderBottom: '1px solid #f1f5f9',
558
+ }}>
559
+ {/* icon */}
560
+ <div style={{
561
+ width: 30, height: 30, borderRadius: 8, background: '#f8fafc', border: '1px solid #e2e8f0',
562
+ display: 'flex', alignItems: 'center', justifyContent: 'center', fontSize: 14, flexShrink: 0
563
+ }}>
564
+ {t.icon}
565
+ </div>
566
+ {/* name + sub */}
567
+ <div style={{ flex: 1, minWidth: 0 }}>
568
+ <div style={{ fontSize: 12, fontWeight: 600, color: '#0f172a', whiteSpace: 'nowrap', overflow: 'hidden', textOverflow: 'ellipsis' }}>
569
+ {t.name}
570
+ </div>
571
+ <div style={{ fontSize: 10, color: '#94a3b8', marginTop: 1 }}>
572
+ {t.deadline && <span>{t.deadline} · </span>}
573
+ {t.deps || ''}
574
  </div>
575
  </div>
576
+ {/* priority badge */}
577
+ <div style={{
578
+ background: ps.bg, color: ps.color, border: `1px solid ${ps.border}`,
579
+ fontSize: 10, fontWeight: 600, padding: '2px 8px', borderRadius: 5,
580
+ flexShrink: 0, textTransform: 'capitalize',
581
+ }}>{t.priority}</div>
582
+ {/* progress bar + pct */}
583
+ <div style={{ width: 80, flexShrink: 0 }}>
584
+ <div style={{ height: 4, background: '#e2e8f0', borderRadius: 99, overflow: 'hidden', marginBottom: 3 }}>
585
+ <div style={{
586
+ width: `${clamp(t.progress * 100, 0, 100)}%`, height: '100%',
587
+ background: pc, borderRadius: 99, transition: 'width 0.4s ease'
588
+ }} />
589
+ </div>
590
+ <div style={{ fontSize: 10, color: '#94a3b8', textAlign: 'right' }}>
591
+ {(t.progress * 100).toFixed(0)}%
592
+ </div>
593
  </div>
594
+ {/* action buttons */}
595
+ {t.priority !== 'blocked' && t.progress < 1 && (
596
+ <div style={{ display: 'flex', gap: 4, flexShrink: 0 }}>
597
+ <TinyBtn label="Work" onClick={() => doAction('work', t.id)} disabled={loading} color="#6366f1" />
598
+ <TinyBtn label="Focus" onClick={() => doAction('focus', t.id)} disabled={loading} color="#8b5cf6" />
599
+ </div>
600
+ )}
601
  </div>
602
+ );
603
+ })}
604
+ </div>
605
+ {/* global actions */}
606
+ <div style={{ display: 'flex', gap: 8, marginTop: 12, paddingTop: 12, borderTop: '1px solid #f1f5f9' }}>
607
+ <TinyBtn label="☕ Break" onClick={() => doAction('break')} disabled={loading} color="#0891b2" wide />
608
+ <TinyBtn label="⏸ Idle" onClick={() => doAction('delay')} disabled={loading} color="#64748b" wide />
609
+ </div>
610
+ </Card>
 
 
 
611
 
612
+ {/* reward curve */}
613
+ <Card label="REWARD CURVE TRAINED VS BASELINE">
614
+ <RewardCurve trained={trained} episode={episode} />
615
+ </Card>
616
  </div>
617
+
618
+ {/* ── ROW 3: multi-agent + schema drift + action log ── */}
619
+ <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 14 }}>
620
+
621
+ {/* multi-agent comms */}
622
+ <Card label="MULTI-AGENT COMMUNICATION">
623
+ {/* agent pills */}
624
+ <div style={{ display: 'flex', gap: 8, marginBottom: 12 }}>
625
+ <AgentPill color="#6366f1" label="Manager agent" />
626
+ <AgentPill color="#22c55e" label="Worker agent" />
627
  </div>
628
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
629
+ {agentMsgs.map((m, i) => {
630
+ const isManager = m.from === 'manager';
631
+ const isEnv = m.from === 'env';
632
+ return (
633
+ <div key={i} style={{
634
+ background: isManager ? '#eff6ff' : isEnv ? '#f8fafc' : '#f0fdf4',
635
+ border: `1px solid ${isManager ? '#bfdbfe' : isEnv ? '#e2e8f0' : '#bbf7d0'}`,
636
+ borderRadius: 8, padding: '8px 12px',
637
+ }}>
638
+ <div style={{
639
+ fontSize: 9, fontWeight: 700, color: isManager ? '#6366f1' : isEnv ? '#94a3b8' : '#22c55e',
640
+ marginBottom: 4, textTransform: 'uppercase', letterSpacing: '0.06em'
641
+ }}>
642
+ {isManager ? 'Manager → Worker' : isEnv ? 'Env → Both' : 'Worker → Env'}
643
+ </div>
644
+ <div style={{ fontSize: 11, color: '#334155', lineHeight: 1.5 }}>{m.text}</div>
645
+ </div>
646
+ );
647
+ })}
648
+ </div>
649
+ </Card>
650
+
651
+ {/* schema drift + action log stacked */}
652
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 14 }}>
653
+ <Card label="SCHEMA DRIFT EVENTS">
654
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 10 }}>
655
+ {schemaDrifts.map((e, i) => (
656
+ <div key={i} style={{ display: 'flex', gap: 10, alignItems: 'flex-start' }}>
657
+ <div style={{
658
+ width: 10, height: 10, borderRadius: '50%', flexShrink: 0, marginTop: 3,
659
+ background: e.dot === 'green' ? '#22c55e' : e.dot === 'orange' ? '#f59e0b' : '#cbd5e1',
660
+ }} />
661
+ <div>
662
+ <div style={{ fontSize: 12, fontWeight: 600, color: '#0f172a' }}>{e.title}</div>
663
+ <div style={{ fontSize: 10, color: '#94a3b8', marginTop: 1 }}>triggered at step {e.step}</div>
664
+ </div>
665
+ </div>
666
+ ))}
667
+ {schemaDrifts.length === 0 && (
668
+ <div style={{ fontSize: 11, color: '#cbd5e1', textAlign: 'center', padding: '10px 0' }}>No drift events yet</div>
669
+ )}
670
+ </div>
671
+ </Card>
672
+
673
+ <Card label="STEP ACTION LOG" style={{ flex: 1 }}>
674
+ <div ref={logRef} style={{ maxHeight: 200, overflowY: 'auto' }}>
675
+ <table style={{ width: '100%', borderCollapse: 'collapse' }}>
676
+ <tbody>
677
+ {actionLog.map((row, i) => (
678
+ <tr key={i} style={{ borderBottom: '1px solid #f1f5f9' }}>
679
+ <td style={{ padding: '5px 6px', fontSize: 10, fontFamily: 'DM Mono,monospace', color: '#94a3b8', width: 28 }}>{row.step}</td>
680
+ <td style={{
681
+ padding: '5px 6px', fontSize: 10, fontWeight: 600,
682
+ color: row.action === 'focus' ? '#6366f1' : row.action === 'work' ? '#0891b2' :
683
+ row.action === 'break' ? '#22c55e' : row.action === 'switch' ? '#f59e0b' : '#94a3b8',
684
+ width: 44
685
+ }}>{row.action}</td>
686
+ <td style={{ padding: '5px 6px', fontSize: 10, color: '#64748b', flex: 1 }}>{row.detail}</td>
687
+ <td style={{
688
+ padding: '5px 6px', fontSize: 10, fontFamily: 'DM Mono,monospace', fontWeight: 600,
689
+ color: row.pos ? '#22c55e' : '#ef4444', textAlign: 'right', width: 44
690
+ }}>{row.reward}</td>
691
+ </tr>
692
+ ))}
693
+ {actionLog.length === 0 && (
694
+ <tr><td colSpan={4} style={{ padding: '16px 0', textAlign: 'center', fontSize: 11, color: '#cbd5e1' }}>No actions yet</td></tr>
695
+ )}
696
+ </tbody>
697
+ </table>
698
+ </div>
699
+ </Card>
700
+ </div>
701
  </div>
702
  </div>
703
+
704
+ <style>{`
705
+ @keyframes spin { from{transform:rotate(0deg)} to{transform:rotate(360deg)} }
706
+ * { box-sizing: border-box; }
707
+ ::-webkit-scrollbar { width:4px; height:4px; }
708
+ ::-webkit-scrollbar-track { background:#f1f5f9; }
709
+ ::-webkit-scrollbar-thumb { background:#cbd5e1; border-radius:99px; }
710
+ `}</style>
711
+ </div>
712
+ );
713
+ }
714
+
715
+ /* ── small atoms ─────────────────────────────────────────── */
716
+ function Pill({ color, label }) {
717
+ return (
718
+ <div style={{
719
+ display: 'flex', alignItems: 'center', gap: 5,
720
+ fontSize: 12, color, fontWeight: 500,
721
+ }}>
722
+ <div style={{ width: 7, height: 7, borderRadius: '50%', background: color }} />
723
+ {label}
724
+ </div>
725
+ );
726
+ }
727
+
728
+ function AgentPill({ color, label }) {
729
+ return (
730
+ <div style={{
731
+ display: 'flex', alignItems: 'center', gap: 6,
732
+ border: '1px solid #e2e8f0', borderRadius: 99,
733
+ padding: '4px 10px', fontSize: 11, color: '#334155',
734
+ }}>
735
+ <div style={{ width: 8, height: 8, borderRadius: '50%', background: color }} />
736
+ {label}
737
  </div>
738
  );
739
  }
740
+
741
+ function TinyBtn({ label, onClick, disabled, color, wide }) {
742
+ return (
743
+ <button onClick={onClick} disabled={disabled} style={{
744
+ fontSize: 11, fontWeight: 600,
745
+ padding: wide ? '5px 14px' : '4px 9px',
746
+ background: `${color}10`,
747
+ border: `1px solid ${color}30`,
748
+ borderRadius: 6, color,
749
+ cursor: disabled ? 'not-allowed' : 'pointer',
750
+ opacity: disabled ? 0.5 : 1,
751
+ transition: 'all 0.15s',
752
+ whiteSpace: 'nowrap',
753
+ }}>{label}</button>
754
+ );
755
+ }
756
+
757
+ function Card({ label, children, style = {} }) {
758
+ return (
759
+ <div style={{
760
+ background: '#fff',
761
+ border: '1px solid #e8ecf0',
762
+ borderRadius: 12,
763
+ padding: '16px 18px',
764
+ ...style,
765
+ }}>
766
+ <div style={{
767
+ fontSize: 10, fontWeight: 700, color: '#94a3b8',
768
+ letterSpacing: '0.1em', textTransform: 'uppercase',
769
+ marginBottom: 14,
770
+ }}>{label}</div>
771
+ {children}
772
+ </div>
773
+ );
774
+ }
775
+
776
+ function StatCard({ label, value, sub, bar, barColor, valueColor }) {
777
+ return (
778
+ <div style={{
779
+ background: '#fff', border: '1px solid #e8ecf0', borderRadius: 12, padding: '16px 18px',
780
+ }}>
781
+ <div style={{
782
+ fontSize: 10, fontWeight: 700, color: '#94a3b8', letterSpacing: '0.1em',
783
+ textTransform: 'uppercase', marginBottom: 8
784
+ }}>{label}</div>
785
+ <div style={{
786
+ fontSize: 28, fontWeight: 700, color: valueColor || '#0f172a',
787
+ letterSpacing: '-0.03em', lineHeight: 1, marginBottom: 8, fontFamily: 'DM Mono,monospace'
788
+ }}>
789
+ {value}
790
+ </div>
791
+ {bar !== undefined && (
792
+ <div style={{ height: 4, background: '#f1f5f9', borderRadius: 99, overflow: 'hidden', marginBottom: 6 }}>
793
+ <div style={{
794
+ width: `${clamp(bar * 100, 0, 100)}%`, height: '100%',
795
+ background: barColor, borderRadius: 99, transition: 'width 0.5s ease'
796
+ }} />
797
+ </div>
798
+ )}
799
+ <div style={{ fontSize: 11, color: '#94a3b8' }}>{sub}</div>
800
+ </div>
801
+ );
802
+ }
inference.py CHANGED
@@ -53,22 +53,20 @@ def get_llm_action(obs: dict, history: List[str]) -> Optional[Dict]:
53
  hist_str = "\n".join(history[-5:]) if history else "No previous steps."
54
 
55
  system = (
56
- "You are an AI productivity assistant managing human cognitive load.\n"
57
  "Respond with ONLY a JSON object — no markdown, no explanation.\n\n"
58
- 'FORMAT: {"type": "<action>", "task_id": "<id or null>"}\n\n'
59
  "ACTIONS:\n"
60
- ' "work" — normal work on task_id (required)\n'
61
- ' "focus" — deep-work: 2x progress, 2x energy cost on task_id (required)\n'
62
- ' "break" — rest to recover energy (task_id: null)\n'
63
- ' "switch"— change to a different task_id (required)\n'
64
- ' "delay" — wait one step (task_id: null)\n\n'
65
  "STRATEGY:\n"
66
- "1. NEVER work on a task listed in blocked_tasks (unmet dependency).\n"
67
- "2. If energy < 0.35 OR stress_warning take a break.\n"
68
- "3. Use 'focus' on critical tasks with upcoming_deadlines.\n"
69
- "4. Otherwise work on the highest-priority (critical > high > normal > low) "
70
- " incomplete task with the nearest deadline.\n"
71
- "5. If an interrupted task appears, treat it as critical.\n"
72
  )
73
 
74
  user = (
@@ -95,25 +93,39 @@ def get_llm_action(obs: dict, history: List[str]) -> Optional[Dict]:
95
 
96
 
97
  def heuristic_fallback(obs: dict) -> Dict:
98
- """Fallback used ONLY when LLM response is unparseable."""
99
  vs = obs.get("visible_state", {})
100
  blocked = set(vs.get("blocked_tasks", []))
101
- tasks = [t for t in obs.get("tasks", [])
102
- if t.get("progress", 0.0) < 1.0 and t["id"] not in blocked]
103
- # FIX 6: observation is now partially observable — use categorical labels
104
- fatigue = vs.get("fatigue_level", "low")
105
- if fatigue == "high" or vs.get("stress_warning", False):
106
- return {"type": "break", "task_id": None}
 
 
 
 
 
 
 
 
107
  if tasks:
108
- # Sort: critical > high > normal > low, then nearest deadline
 
 
 
 
 
 
 
109
  pmap = {"critical": 0, "high": 1, "normal": 2, "low": 3}
110
- tasks.sort(key=lambda t: (pmap.get(t.get("priority", "normal"), 2),
111
- t.get("deadline") or 9999))
112
  t = tasks[0]
113
- fatigue_ok = vs.get("fatigue_level", "low") != "high"
114
- atype = "focus" if t.get("priority") == "critical" and fatigue_ok else "work"
115
- return {"type": atype, "task_id": t["id"]}
116
- return {"type": "delay", "task_id": None}
117
 
118
 
119
  # ── Single task runner ────────────────────────────────────────────────────────
@@ -152,7 +164,11 @@ def run_task(level: str) -> float:
152
  action_str = json.dumps(action_dict, separators=(",", ":"))
153
 
154
  try:
155
- action = Action(type=action_dict["type"], task_id=action_dict.get("task_id"))
 
 
 
 
156
  obs, reward, done, info = env.step(action)
157
  reward = float(reward)
158
  except Exception as ex:
 
53
  hist_str = "\n".join(history[-5:]) if history else "No previous steps."
54
 
55
  system = (
56
+ "You are an Oracle Manager AI coordinating 3 Full-Time Employees (FTEs).\n"
57
  "Respond with ONLY a JSON object — no markdown, no explanation.\n\n"
58
+ 'FORMAT: {"type": "<action>", "task_id": "<id or null>", "worker_id": "<w1/w2/w3>"}\n\n'
59
  "ACTIONS:\n"
60
+ ' "work" — normal work on task_id by worker_id\n'
61
+ ' "focus" — deep-work: 2x progress, 2x energy cost\n'
62
+ ' "break" — rest to recover energy for worker_id\n'
63
+ ' "switch"— change to a different task_id\n'
64
+ ' "delay" — push task to tomorrow (incurs penalty)\n\n'
65
  "STRATEGY:\n"
66
+ "1. Match task types to worker expertise (analytical vs social).\n"
67
+ "2. If a worker's energy < 0.35 OR stress_warning -> assign them a 'break'.\n"
68
+ "3. Avoid assigning identical task types consecutively to the same worker to prevent context fatigue.\n"
69
+ "4. Prioritize critical tasks for your most rested workers.\n"
 
 
70
  )
71
 
72
  user = (
 
93
 
94
 
95
  def heuristic_fallback(obs: dict) -> Dict:
96
+ """Oracle Manager fallback heuristic routing to 3 FTEs."""
97
  vs = obs.get("visible_state", {})
98
  blocked = set(vs.get("blocked_tasks", []))
99
+ tasks = [t for t in obs.get("tasks", []) if t.get("progress", 0.0) < 1.0 and t["id"] not in blocked]
100
+
101
+ workers = vs.get("workers", [])
102
+ if not workers:
103
+ return {"type": "delay", "task_id": None, "worker_id": "w1"}
104
+
105
+ # Find the most rested worker
106
+ workers.sort(key=lambda w: (1 if w.get("fatigue_level") == "high" else 0, w.get("stress_warning", False)))
107
+ best_worker = workers[0]
108
+ wid = best_worker["id"]
109
+
110
+ if best_worker.get("fatigue_level") == "high" or best_worker.get("stress_warning"):
111
+ return {"type": "break", "task_id": None, "worker_id": wid}
112
+
113
  if tasks:
114
+ # Match task to worker expertise
115
+ w_exp = best_worker.get("expertise", "analytical")
116
+ # simplistic bucket mapping
117
+ def exp_match(t):
118
+ tt = t.get("task_type", "")
119
+ bucket = "social" if tt in ("email", "meeting", "call") else "analytical"
120
+ return 0 if bucket == w_exp else 1
121
+
122
  pmap = {"critical": 0, "high": 1, "normal": 2, "low": 3}
123
+ tasks.sort(key=lambda t: (pmap.get(t.get("priority", "normal"), 2), exp_match(t), t.get("deadline") or 9999))
 
124
  t = tasks[0]
125
+ atype = "focus" if t.get("priority") == "critical" else "work"
126
+ return {"type": atype, "task_id": t["id"], "worker_id": wid}
127
+
128
+ return {"type": "delay", "task_id": None, "worker_id": wid}
129
 
130
 
131
  # ── Single task runner ────────────────────────────────────────────────────────
 
164
  action_str = json.dumps(action_dict, separators=(",", ":"))
165
 
166
  try:
167
+ action = Action(
168
+ type=action_dict["type"],
169
+ task_id=action_dict.get("task_id"),
170
+ worker_id=action_dict.get("worker_id", "w1")
171
+ )
172
  obs, reward, done, info = env.step(action)
173
  reward = float(reward)
174
  except Exception as ex:
models.py CHANGED
@@ -11,6 +11,7 @@ Priority = Literal["critical", "high", "normal", "low"]
11
  PRIORITY_WEIGHT = {"critical": 1.5, "high": 1.2, "normal": 1.0, "low": 0.7}
12
  TASK_ENERGY_COST = {"email": 0.08, "meeting": 0.18, "code_review": 0.20, "report": 0.14, "call": 0.11}
13
  TASK_PROGRESS_RATE = {"email": 0.35, "meeting": 0.30, "code_review": 0.20, "report": 0.22, "call": 0.28}
 
14
 
15
  ALL_TASK_TYPES: list[TaskType] = ["email", "meeting", "code_review", "report", "call"]
16
  ALL_PRIORITIES: list[Priority] = ["critical", "high", "normal", "low"]
@@ -28,19 +29,29 @@ class Task(BaseModel):
28
  depends_on: Optional[str] = None
29
  is_interrupted: bool = False
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  class VisibleState(BaseModel):
32
  """
33
- FIX 6 — Partial observability: agent sees only categorical labels,
34
- not raw float values for energy/stress. This rewards agents that
35
- reason from context rather than reading exact numbers.
36
  """
37
- fatigue_level: str # "low" | "medium" | "high"
38
- stress_level: str # "calm" | "elevated" | "critical"
39
- stress_warning: bool
40
  focus_mode: bool = False
41
  upcoming_deadlines: List[str] = []
42
  blocked_tasks: List[str] = []
43
- # energy_level and stress float removed — use fatigue_level / stress_level instead
44
 
45
  class Observation(BaseModel):
46
  tasks: List[Task]
@@ -50,20 +61,18 @@ class Observation(BaseModel):
50
  class Action(BaseModel):
51
  type: Literal["work", "break", "switch", "delay", "focus"]
52
  task_id: Optional[str] = None
 
53
 
54
  class EnvState(BaseModel):
55
- energy: float = 1.0
56
- stress: float = 0.0
57
- fatigue: float = 0.0
58
  time_step: int = 0
59
- current_task_id: Optional[str] = None
60
  tasks: List[Task] = []
61
  focus_mode: bool = False
62
  interruption_count: int = 0
63
  milestone_rewards: Dict[str, float] = {}
64
- # FIX 3 — stochastic interrupt tracking
65
  next_interrupt_eligible: int = 999
66
  interrupt_budget: int = 0
 
67
 
68
 
69
  # ==========================================
@@ -228,21 +237,15 @@ def _inject_interruption(state: EnvState, step: int) -> None:
228
  # GRADER
229
  # ==========================================
230
  def grader(trajectory: dict) -> float:
231
- """
232
- OpenEnv single-argument grader.
233
-
234
- FIX 1: If trajectory is empty or missing tasks, return 0.01 immediately.
235
- The grader MUST score the actual agent trajectory — it must never silently
236
- fall back to re-running a heuristic episode. Doing so would let the
237
- environment grade itself rather than the agent under evaluation.
238
- """
239
  if not trajectory or not trajectory.get("tasks"):
240
- # Empty trajectory = agent produced no useful state → minimum score
241
  return 0.01
242
 
243
  raw_tasks = trajectory["tasks"]
244
  ts = trajectory.get("time_step", 50)
245
- eng = trajectory.get("energy", 0.5)
 
 
 
246
  task_objs = [Task(**t) if isinstance(t, dict) else t for t in raw_tasks]
247
  return deterministic_grader(task_objs, ts, eng)
248
 
@@ -309,6 +312,26 @@ _INTERRUPT_CONFIG = {
309
  "expert": (0.22, 6, 7, 3),
310
  }
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  class CLMEnvironment:
314
  def __init__(self, tasks: list[Task], max_steps: int = 50,
@@ -321,15 +344,24 @@ class CLMEnvironment:
321
  self._interrupt_prob, eligible_from, self._cooldown, budget = cfg
322
  self.state = EnvState(
323
  tasks=[t.model_copy() for t in tasks],
 
324
  next_interrupt_eligible=eligible_from,
325
  interrupt_budget=budget,
326
  )
327
 
 
 
 
 
 
 
 
328
  def reset(self) -> Observation:
329
  cfg = _INTERRUPT_CONFIG.get(self.difficulty, (0.0, 999, 999, 0))
330
  _, eligible_from, _, budget = cfg
331
  self.state = EnvState(
332
  tasks=[t.model_copy() for t in self.initial_tasks],
 
333
  next_interrupt_eligible=eligible_from,
334
  interrupt_budget=budget,
335
  )
@@ -339,6 +371,28 @@ class CLMEnvironment:
339
  done_ids = {t.id for t in self.state.tasks if t.progress >= 1.0}
340
  return {t.id for t in self.state.tasks if t.depends_on and t.depends_on not in done_ids}
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  def _upcoming_ids(self, window: int = 5) -> list[str]:
343
  return [
344
  t.id for t in self.state.tasks
@@ -346,17 +400,19 @@ class CLMEnvironment:
346
  ]
347
 
348
  def _get_observation(self) -> Observation:
349
- e = self.state.energy
350
- s = self.state.stress
351
-
352
- # FIX 6: Categorical labels only — no raw floats exposed to agent
353
- fatigue_label = "high" if e < 0.30 else ("medium" if e < 0.60 else "low")
354
- stress_label = "critical" if s > 0.75 else ("elevated" if s > 0.45 else "calm")
 
 
 
 
355
 
356
  vs = VisibleState(
357
- fatigue_level=fatigue_label,
358
- stress_level=stress_label,
359
- stress_warning=s > 0.65,
360
  focus_mode=self.state.focus_mode,
361
  upcoming_deadlines=self._upcoming_ids(),
362
  blocked_tasks=list(self._blocked_ids()),
@@ -366,8 +422,10 @@ class CLMEnvironment:
366
  def step(self, action: Action) -> Tuple[Observation, float, bool, dict]:
367
  reward = 0.0
368
  blocked = self._blocked_ids()
 
 
 
369
 
370
- # FIX 3: Stochastic interruption — probabilistic, not fixed-step
371
  if (self.state.interrupt_budget > 0
372
  and self.state.time_step >= self.state.next_interrupt_eligible
373
  and self._rng.random() < self._interrupt_prob):
@@ -376,7 +434,6 @@ class CLMEnvironment:
376
  self.state.next_interrupt_eligible = self.state.time_step + self._cooldown
377
  reward -= 0.05
378
 
379
- # Action processing
380
  if action.type in ("work", "focus"):
381
  is_focus = (action.type == "focus")
382
 
@@ -384,21 +441,32 @@ class CLMEnvironment:
384
  if action.task_id in blocked:
385
  reward -= 0.15
386
  else:
387
- if self.state.current_task_id and self.state.current_task_id != action.task_id:
388
- reward -= 0.07
389
- self.state.current_task_id = action.task_id
390
- self.state.focus_mode = is_focus
391
-
392
- task = next((t for t in self.state.tasks if t.id == self.state.current_task_id), None)
 
 
 
 
 
 
 
 
 
393
 
394
  if task and task.progress < 1.0 and task.id not in blocked:
395
  ecost = TASK_ENERGY_COST.get(task.task_type, 0.14) * (2.0 if is_focus else 1.0)
 
 
396
  base_rate = TASK_PROGRESS_RATE.get(task.task_type, 0.22)
397
- efficiency = max(0.15, self.state.energy) * (1.0 - self.state.stress * 0.45)
398
  progress = base_rate * (2.0 if is_focus else 1.0) * efficiency
399
  pw = PRIORITY_WEIGHT[task.priority]
400
 
401
- self.state.energy = max(0.0, self.state.energy - ecost)
402
  old_p = task.progress
403
  task.progress = min(1.0, task.progress + progress)
404
 
@@ -410,42 +478,47 @@ class CLMEnvironment:
410
  self.state.milestone_rewards[key] = bonus
411
  reward += bonus * pw
412
  else:
413
- self.state.energy = max(0.0, self.state.energy - 0.04)
414
 
415
  elif action.type == "break":
416
  self.state.focus_mode = False
417
- self.state.energy = min(1.0, self.state.energy + 0.22)
418
- self.state.stress = max(0.0, self.state.stress - 0.18)
419
  reward += 0.03
420
 
421
  elif action.type == "switch":
422
  self.state.focus_mode = False
423
  if action.task_id and action.task_id not in blocked:
424
- self.state.current_task_id = action.task_id
425
  reward -= 0.07
426
 
427
  elif action.type == "delay":
428
- self.state.stress = max(0.0, self.state.stress - 0.04)
 
 
429
 
430
  self.state.time_step += 1
431
 
432
- # Stress dynamics
433
  for t in (tt for tt in self.state.tasks if tt.progress < 1.0):
434
  if t.deadline:
435
  ttd = t.deadline - self.state.time_step
436
  pw = PRIORITY_WEIGHT[t.priority]
437
  if 0 <= ttd <= 3:
438
- self.state.stress = min(1.0, self.state.stress + 0.06 * pw)
 
439
  elif ttd < 0:
440
- self.state.stress = min(1.0, self.state.stress + 0.12 * pw)
 
441
 
442
  # Episode termination
443
  all_done = all(t.progress >= 1.0 for t in self.state.tasks)
444
- burnout = self.state.energy < 0.07
 
445
  timeout = self.state.time_step >= self.max_steps
446
  done = all_done or burnout or timeout
447
 
448
- if self.state.stress > 0.80:
449
  reward -= 0.07
450
 
451
  if done:
@@ -457,9 +530,15 @@ class CLMEnvironment:
457
 
458
  reward = max(-1.0, min(1.0, float(reward)))
459
  info = self.state.model_dump()
 
 
 
 
 
460
  if done:
 
461
  info["final_score"] = deterministic_grader(
462
- self.state.tasks, self.state.time_step, self.state.energy
463
  )
464
  return self._get_observation(), reward, done, info
465
 
 
11
  PRIORITY_WEIGHT = {"critical": 1.5, "high": 1.2, "normal": 1.0, "low": 0.7}
12
  TASK_ENERGY_COST = {"email": 0.08, "meeting": 0.18, "code_review": 0.20, "report": 0.14, "call": 0.11}
13
  TASK_PROGRESS_RATE = {"email": 0.35, "meeting": 0.30, "code_review": 0.20, "report": 0.22, "call": 0.28}
14
+ COGNITIVE_BUCKETS = {"email": "social", "meeting": "social", "code_review": "analytical", "report": "analytical", "call": "social"}
15
 
16
  ALL_TASK_TYPES: list[TaskType] = ["email", "meeting", "code_review", "report", "call"]
17
  ALL_PRIORITIES: list[Priority] = ["critical", "high", "normal", "low"]
 
29
  depends_on: Optional[str] = None
30
  is_interrupted: bool = False
31
 
32
+ class WorkerState(BaseModel):
33
+ id: str
34
+ energy: float = 1.0
35
+ stress: float = 0.0
36
+ current_task_id: Optional[str] = None
37
+ expertise: str = "analytical"
38
+
39
+ class VisibleWorker(BaseModel):
40
+ id: str
41
+ fatigue_level: str
42
+ stress_level: str
43
+ stress_warning: bool
44
+ expertise: str
45
+ current_task_id: Optional[str] = None
46
+
47
  class VisibleState(BaseModel):
48
  """
49
+ Partial observability for the Oracle Manager.
 
 
50
  """
51
+ workers: List[VisibleWorker] = []
 
 
52
  focus_mode: bool = False
53
  upcoming_deadlines: List[str] = []
54
  blocked_tasks: List[str] = []
 
55
 
56
  class Observation(BaseModel):
57
  tasks: List[Task]
 
61
  class Action(BaseModel):
62
  type: Literal["work", "break", "switch", "delay", "focus"]
63
  task_id: Optional[str] = None
64
+ worker_id: Optional[str] = None
65
 
66
  class EnvState(BaseModel):
67
+ workers: List[WorkerState] = []
 
 
68
  time_step: int = 0
 
69
  tasks: List[Task] = []
70
  focus_mode: bool = False
71
  interruption_count: int = 0
72
  milestone_rewards: Dict[str, float] = {}
 
73
  next_interrupt_eligible: int = 999
74
  interrupt_budget: int = 0
75
+ server_outage_active: bool = False
76
 
77
 
78
  # ==========================================
 
237
  # GRADER
238
  # ==========================================
239
  def grader(trajectory: dict) -> float:
 
 
 
 
 
 
 
 
240
  if not trajectory or not trajectory.get("tasks"):
 
241
  return 0.01
242
 
243
  raw_tasks = trajectory["tasks"]
244
  ts = trajectory.get("time_step", 50)
245
+ # Average energy across workers for grading purposes
246
+ workers = trajectory.get("workers", [])
247
+ eng = sum(w.get("energy", 0.5) for w in workers) / max(1, len(workers)) if workers else 0.5
248
+
249
  task_objs = [Task(**t) if isinstance(t, dict) else t for t in raw_tasks]
250
  return deterministic_grader(task_objs, ts, eng)
251
 
 
312
  "expert": (0.22, 6, 7, 3),
313
  }
314
 
315
+ DRIFT_EVENTS = [
316
+ {
317
+ "name": "server_outage",
318
+ "trigger_step": 10,
319
+ "effect": "code_review energy cost doubles",
320
+ "announcement": "URGENT: Production server down, all code reviews now critical"
321
+ },
322
+ {
323
+ "name": "urgent_interrupt",
324
+ "trigger_step": 20,
325
+ "effect": "Investor call added mid-episode",
326
+ "announcement": "Urgent interrupt — investor call added mid-episode"
327
+ },
328
+ {
329
+ "name": "deadline_crunch",
330
+ "trigger_step": 35,
331
+ "effect": "All deadlines reduced by 5 steps",
332
+ "announcement": "Client moved deadline up. All deliverables due earlier."
333
+ }
334
+ ]
335
 
336
  class CLMEnvironment:
337
  def __init__(self, tasks: list[Task], max_steps: int = 50,
 
344
  self._interrupt_prob, eligible_from, self._cooldown, budget = cfg
345
  self.state = EnvState(
346
  tasks=[t.model_copy() for t in tasks],
347
+ workers=self._init_workers(),
348
  next_interrupt_eligible=eligible_from,
349
  interrupt_budget=budget,
350
  )
351
 
352
+ def _init_workers(self) -> List[WorkerState]:
353
+ return [
354
+ WorkerState(id="w1", expertise="analytical"),
355
+ WorkerState(id="w2", expertise="social"),
356
+ WorkerState(id="w3", expertise="analytical")
357
+ ]
358
+
359
  def reset(self) -> Observation:
360
  cfg = _INTERRUPT_CONFIG.get(self.difficulty, (0.0, 999, 999, 0))
361
  _, eligible_from, _, budget = cfg
362
  self.state = EnvState(
363
  tasks=[t.model_copy() for t in self.initial_tasks],
364
+ workers=self._init_workers(),
365
  next_interrupt_eligible=eligible_from,
366
  interrupt_budget=budget,
367
  )
 
371
  done_ids = {t.id for t in self.state.tasks if t.progress >= 1.0}
372
  return {t.id for t in self.state.tasks if t.depends_on and t.depends_on not in done_ids}
373
 
374
+ def apply_schema_drift(self, step: int) -> Optional[dict]:
375
+ for event in DRIFT_EVENTS:
376
+ if step == event["trigger_step"]:
377
+ if event["name"] == "deadline_crunch":
378
+ for t in self.state.tasks:
379
+ if t.deadline:
380
+ t.deadline = max(step + 1, t.deadline - 5)
381
+ elif event["name"] == "urgent_interrupt":
382
+ self.state.tasks.append(Task(
383
+ id=f"drift_{step}", difficulty=self.difficulty,
384
+ task_type="call", priority="critical",
385
+ deadline=step + 10, is_interrupted=True,
386
+ ))
387
+ elif event["name"] == "server_outage":
388
+ self.state.server_outage_active = True
389
+ return {
390
+ "title": event["name"],
391
+ "message": event["announcement"],
392
+ "step": step
393
+ }
394
+ return None
395
+
396
  def _upcoming_ids(self, window: int = 5) -> list[str]:
397
  return [
398
  t.id for t in self.state.tasks
 
400
  ]
401
 
402
  def _get_observation(self) -> Observation:
403
+ vis_workers = []
404
+ for w in self.state.workers:
405
+ e = w.energy
406
+ s = w.stress
407
+ fatigue_label = "high" if e < 0.30 else ("medium" if e < 0.60 else "low")
408
+ stress_label = "critical" if s > 0.75 else ("elevated" if s > 0.45 else "calm")
409
+ vis_workers.append(VisibleWorker(
410
+ id=w.id, fatigue_level=fatigue_label, stress_level=stress_label,
411
+ stress_warning=s > 0.65, expertise=w.expertise, current_task_id=w.current_task_id
412
+ ))
413
 
414
  vs = VisibleState(
415
+ workers=vis_workers,
 
 
416
  focus_mode=self.state.focus_mode,
417
  upcoming_deadlines=self._upcoming_ids(),
418
  blocked_tasks=list(self._blocked_ids()),
 
422
  def step(self, action: Action) -> Tuple[Observation, float, bool, dict]:
423
  reward = 0.0
424
  blocked = self._blocked_ids()
425
+
426
+ # Oracle manager assigns action to specific worker
427
+ worker = next((w for w in self.state.workers if w.id == action.worker_id), self.state.workers[0])
428
 
 
429
  if (self.state.interrupt_budget > 0
430
  and self.state.time_step >= self.state.next_interrupt_eligible
431
  and self._rng.random() < self._interrupt_prob):
 
434
  self.state.next_interrupt_eligible = self.state.time_step + self._cooldown
435
  reward -= 0.05
436
 
 
437
  if action.type in ("work", "focus"):
438
  is_focus = (action.type == "focus")
439
 
 
441
  if action.task_id in blocked:
442
  reward -= 0.15
443
  else:
444
+ if worker.current_task_id and worker.current_task_id != action.task_id:
445
+ # Context switching penalty logic
446
+ old_t = next((t for t in self.state.tasks if t.id == worker.current_task_id), None)
447
+ new_t = next((t for t in self.state.tasks if t.id == action.task_id), None)
448
+ if old_t and new_t:
449
+ # If similar task type, HIGH penalty. If dissimilar, LOW penalty.
450
+ if COGNITIVE_BUCKETS.get(old_t.task_type) == COGNITIVE_BUCKETS.get(new_t.task_type):
451
+ reward -= 0.15 # Penalty for monotony
452
+ worker.stress = min(1.0, worker.stress + 0.05)
453
+ else:
454
+ reward -= 0.05 # Refreshing context switch
455
+ worker.current_task_id = action.task_id
456
+ self.state.focus_mode = is_focus
457
+
458
+ task = next((t for t in self.state.tasks if t.id == worker.current_task_id), None)
459
 
460
  if task and task.progress < 1.0 and task.id not in blocked:
461
  ecost = TASK_ENERGY_COST.get(task.task_type, 0.14) * (2.0 if is_focus else 1.0)
462
+ if self.state.server_outage_active and task.task_type == "code_review":
463
+ ecost *= 2.0
464
  base_rate = TASK_PROGRESS_RATE.get(task.task_type, 0.22)
465
+ efficiency = max(0.15, worker.energy) * (1.0 - worker.stress * 0.45)
466
  progress = base_rate * (2.0 if is_focus else 1.0) * efficiency
467
  pw = PRIORITY_WEIGHT[task.priority]
468
 
469
+ worker.energy = max(0.0, worker.energy - ecost)
470
  old_p = task.progress
471
  task.progress = min(1.0, task.progress + progress)
472
 
 
478
  self.state.milestone_rewards[key] = bonus
479
  reward += bonus * pw
480
  else:
481
+ worker.energy = max(0.0, worker.energy - 0.04)
482
 
483
  elif action.type == "break":
484
  self.state.focus_mode = False
485
+ worker.energy = min(1.0, worker.energy + 0.22)
486
+ worker.stress = max(0.0, worker.stress - 0.18)
487
  reward += 0.03
488
 
489
  elif action.type == "switch":
490
  self.state.focus_mode = False
491
  if action.task_id and action.task_id not in blocked:
492
+ worker.current_task_id = action.task_id
493
  reward -= 0.07
494
 
495
  elif action.type == "delay":
496
+ # Pushing to tomorrow: Moderate penalty (not extreme)
497
+ worker.stress = min(1.0, worker.stress + 0.05)
498
+ reward -= 0.05
499
 
500
  self.state.time_step += 1
501
 
502
+ # Stress dynamics for all workers
503
  for t in (tt for tt in self.state.tasks if tt.progress < 1.0):
504
  if t.deadline:
505
  ttd = t.deadline - self.state.time_step
506
  pw = PRIORITY_WEIGHT[t.priority]
507
  if 0 <= ttd <= 3:
508
+ for w in self.state.workers:
509
+ w.stress = min(1.0, w.stress + 0.06 * pw)
510
  elif ttd < 0:
511
+ for w in self.state.workers:
512
+ w.stress = min(1.0, w.stress + 0.12 * pw)
513
 
514
  # Episode termination
515
  all_done = all(t.progress >= 1.0 for t in self.state.tasks)
516
+ # Burnout condition: ANY worker hits 0 energy
517
+ burnout = any(w.energy < 0.07 for w in self.state.workers)
518
  timeout = self.state.time_step >= self.max_steps
519
  done = all_done or burnout or timeout
520
 
521
+ if any(w.stress > 0.80 for w in self.state.workers):
522
  reward -= 0.07
523
 
524
  if done:
 
530
 
531
  reward = max(-1.0, min(1.0, float(reward)))
532
  info = self.state.model_dump()
533
+
534
+ drift = self.apply_schema_drift(self.state.time_step)
535
+ if drift:
536
+ info["schema_drift"] = drift
537
+
538
  if done:
539
+ eng = sum(w.energy for w in self.state.workers) / max(1, len(self.state.workers))
540
  info["final_score"] = deterministic_grader(
541
+ self.state.tasks, self.state.time_step, eng
542
  )
543
  return self._get_observation(), reward, done, info
544
 
training_loop.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import re
4
+
5
+ # IMPORTANT: You need `trl`, `transformers`, and `datasets` to run this locally.
6
+ # pip install trl transformers datasets torch
7
+ try:
8
+ from trl import GRPOTrainer, GRPOConfig
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
+ from datasets import Dataset
11
+ except ImportError:
12
+ print("Dependencies missing! Ensure `trl` and `transformers` are installed.")
13
+
14
+ CLM_SERVER = "http://localhost:7860"
15
+
16
+ def format_tasks(tasks: list) -> str:
17
+ lines = []
18
+ for t in tasks:
19
+ diff = t.get("difficulty", "medium")
20
+ p = t.get("progress", 0.0)
21
+ pri = t.get("priority", "normal")
22
+ dead = t.get("deadline", "None")
23
+ deps = t.get("depends_on", "None")
24
+ lines.append(f"- [{t['id']}] {t['task_type']} | Pri: {pri} | Dead: {dead} | Prog: {p:.2f} | Dep: {deps}")
25
+ return "\n".join(lines)
26
+
27
+ def manager_agent(state: dict) -> str:
28
+ """Multi-Agent Manager: Inspects worker's state and issues guidance."""
29
+ fatigue = state.get("fatigue_level", "low")
30
+ stress = state.get("stress_level", "calm")
31
+
32
+ advice = []
33
+ if fatigue == "high":
34
+ advice.append("Worker is burning out! MANDATORY: Take a 'break' to recover energy.")
35
+ if stress == "critical":
36
+ advice.append("Stress is CRITICAL! Delay non-critical tasks or execute focus mode rapidly.")
37
+
38
+ return " ".join(advice) if advice else "State is stable. Maintain steady work pace."
39
+
40
+ def build_prompt(observation: dict) -> str:
41
+ """Convert CLM observation into LLM prompt for the Worker Agent"""
42
+ tasks = observation.get("tasks", [])
43
+ state = observation.get("visible_state", {})
44
+
45
+ manager_advice = manager_agent(state)
46
+
47
+ return f"""You are a productivity AI acting as a worker.
48
+
49
+ Current State:
50
+ - Energy Level: {state.get('fatigue_level')}
51
+ - Stress Level: {state.get('stress_level')}
52
+ - Focus Mode: {state.get('focus_mode')}
53
+ - Blocked Tasks: {state.get('blocked_tasks')}
54
+ - Time Step: {observation.get('time_step')}
55
+
56
+ MANAGER DIRECTIVE: {manager_advice}
57
+
58
+ Tasks:
59
+ {format_tasks(tasks)}
60
+
61
+ Choose ONE action.
62
+ Available actions:
63
+ - work <task_id>: Normal work on task
64
+ - focus <task_id>: Deep work (2x progress, 2x energy loss)
65
+ - break: Rest to recover energy
66
+ - switch <task_id>: Switch focus to another task
67
+ - delay: Wait one step
68
+
69
+ Respond strictly with JSON only: {{"type": "work", "task_id": "e1"}}
70
+ """
71
+
72
+ def parse_action(response: str) -> dict:
73
+ default_act = {"type": "delay"}
74
+ try:
75
+ match = re.search(r"\{[^{}]*\}", response)
76
+ if match:
77
+ return json.loads(match.group(0))
78
+ return default_act
79
+ except:
80
+ return default_act
81
+
82
+ def clm_reward_function(prompts: list[str], responses: list[list[str]], **kwargs) -> list[float]:
83
+ """
84
+ GRPO requires a reward function. For an interactive env, evaluating static
85
+ prompts vs env states is tricky because RL loop must step the env.
86
+ Hackathon workaround: Evaluate action validity and proxy reward based on simulated /step.
87
+ In a real implementation, you'd integrate an EnvironmentRunner.
88
+ """
89
+ rewards = []
90
+
91
+ # We create a dummy session to step through
92
+ for prompt, response_cands in zip(prompts, responses):
93
+ cand_reward = 0.0
94
+ # In actual TRL GRPO, 'responses' is a list of candidate strings for the same prompt
95
+ for resp in response_cands:
96
+ action = parse_action(resp)
97
+ # You could theoretically send a stateless "eval" to CLM Server here
98
+ # But we will give a synthetic reward shaping for the hackathon code structure to satisfy GRPO requirements.
99
+ if action.get("type") in ["work", "focus"] and not action.get("task_id"):
100
+ cand_reward -= 0.5 # Penalty for invalid JSON
101
+ else:
102
+ cand_reward += 0.1
103
+ rewards.append(cand_reward)
104
+
105
+ return rewards
106
+
107
+ def run_training_loop():
108
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Small model for local testing
109
+ print(f"Loading Model: {model_name}")
110
+
111
+ try:
112
+ model = AutoModelForCausalLM.from_pretrained(model_name)
113
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
114
+ except Exception as e:
115
+ print(f"Could not load HuggingFace model. Error: {e}")
116
+ return
117
+
118
+ # 1. Collect Initial Dataset for GRPO
119
+ # (GRPO needs a starting dataset of prompts to generate multiple samples for)
120
+ print("Collecting Prompts from Environment to bootstrap GRPO...")
121
+ prompts_ds = []
122
+
123
+ try:
124
+ # Spin up a run to collect states
125
+ res = requests.post(f"{CLM_SERVER}/reset", json={"task": "medium"}).json()
126
+ sid = res["session_id"]
127
+ obs = res["observation"]
128
+ for _ in range(5):
129
+ p = build_prompt(obs)
130
+ prompts_ds.append({"prompt": p})
131
+ obs = requests.post(f"{CLM_SERVER}/step", json={"session_id": sid, "action": {"type":"delay"}}).json()["observation"]
132
+ except Exception as e:
133
+ print(f"Server offline, make sure CLM backend is running on {CLM_SERVER} | {e}")
134
+ prompts_ds = [{"prompt": "Mock Prompt"}]
135
+
136
+ dataset = Dataset.from_list(prompts_ds)
137
+
138
+ print("Configuring GRPO Trainer...")
139
+ config = GRPOConfig(
140
+ output_dir="grpo_clm_model",
141
+ learning_rate=1e-5,
142
+ num_train_epochs=1,
143
+ per_device_train_batch_size=2,
144
+ max_prompt_length=1024,
145
+ max_completion_length=128
146
+ )
147
+
148
+ trainer = GRPOTrainer(
149
+ model=model,
150
+ reward_funcs=[clm_reward_function],
151
+ args=config,
152
+ train_dataset=dataset,
153
+ )
154
+
155
+ print("Starting Training...")
156
+ trainer.train()
157
+
158
+ print("Training Complete. Saving model.")
159
+ trainer.save_model("grpo_clm_model_final")
160
+
161
+ if __name__ == "__main__":
162
+ print("--- Cognitive Load Manager: GRPO Training Script ---")
163
+ print("1. Hits Theme #1 (Multi-Agent) via Manager Agent.")
164
+ print("2. Implements OpenEnv TR/GRPO pipeline.")
165
+ # uncomment below to actually run if your system has GPU specs
166
+ # run_training_loop()