steinad commited on
Commit
adca48b
·
0 Parent(s):

Initial commit

Browse files
Files changed (46) hide show
  1. .gitignore +167 -0
  2. LICENSE +21 -0
  3. MANIFEST.in +9 -0
  4. README.md +326 -0
  5. digest.txt +0 -0
  6. pyproject.toml +91 -0
  7. requirements.txt +8 -0
  8. src/pips/__init__.py +17 -0
  9. src/pips/__main__.py +60 -0
  10. src/pips/core.py +991 -0
  11. src/pips/gradio_app.py +911 -0
  12. src/pips/model_registry.py +81 -0
  13. src/pips/models.py +734 -0
  14. src/pips/prompts.py +67 -0
  15. src/pips/saved_examples/movies.json +153 -0
  16. src/pips/saved_examples/time_arithmetic.json +306 -0
  17. src/pips/saved_examples/word_sorting.json +462 -0
  18. src/pips/static/README.md +163 -0
  19. src/pips/static/css/base.css +51 -0
  20. src/pips/static/css/components/buttons.css +119 -0
  21. src/pips/static/css/components/chat.css +344 -0
  22. src/pips/static/css/components/feedback.css +1260 -0
  23. src/pips/static/css/components/forms.css +283 -0
  24. src/pips/static/css/components/modal.css +193 -0
  25. src/pips/static/css/components/panels.css +84 -0
  26. src/pips/static/css/components/responsive.css +123 -0
  27. src/pips/static/css/components/sessions.css +523 -0
  28. src/pips/static/css/components/utilities.css +395 -0
  29. src/pips/static/css/main.css +18 -0
  30. src/pips/static/css/tokens.css +45 -0
  31. src/pips/static/default_sessions/builtin_sessions.json +80 -0
  32. src/pips/static/js/core/logger.js +32 -0
  33. src/pips/static/js/core/state.js +174 -0
  34. src/pips/static/js/core/storage.js +398 -0
  35. src/pips/static/js/handlers/socket-handlers.js +442 -0
  36. src/pips/static/js/main.js +250 -0
  37. src/pips/static/js/network/socket.js +198 -0
  38. src/pips/static/js/ui/dom-manager.js +149 -0
  39. src/pips/static/js/ui/image-handler.js +360 -0
  40. src/pips/static/js/ui/interactive-feedback.js +955 -0
  41. src/pips/static/js/ui/message-manager.js +727 -0
  42. src/pips/static/js/ui/session-manager.js +1451 -0
  43. src/pips/static/js/ui/settings-manager.js +504 -0
  44. src/pips/templates/index_modular.html +337 -0
  45. src/pips/utils.py +131 -0
  46. src/pips/web_app.py +571 -0
.gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+
164
+ data
165
+ logs
166
+
167
+ .env
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 PIPS Development Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
MANIFEST.in ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt
4
+ recursive-include src/pips/templates *
5
+ recursive-include src/pips/static *.css *.js *.json *.html
6
+ recursive-include src/pips/static/default_sessions *.json
7
+ global-exclude *.pyc
8
+ global-exclude __pycache__
9
+ global-exclude .DS_Store
README.md ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIPS: Python Iterative Problem Solving
2
+
3
+ **PIPS** (Python Iterative Problem Solving) is a powerful library for iterative code generation and refinement using Large Language Models (LLMs). It provides both programmatic APIs and a web interface for solving complex problems through iterative reasoning and code execution.
4
+
5
+ ## Features
6
+
7
+ - 🤖 **Multi-LLM Support**: Works with OpenAI GPT, Anthropic Claude, and Google GenAI models
8
+ - 🔄 **Iterative Problem Solving**: Automatically refines solutions through multiple iterations
9
+ - 🧠 **Two Solving Modes**: Chain-of-thought reasoning and code-based problem solving
10
+ - 🌐 **Web Interface**: Beautiful Flask-SocketIO web UI for interactive problem solving
11
+ - 📊 **Image Support**: Process problems with both text and image inputs
12
+ - ⚡ **Streaming Support**: Real-time token streaming for responsive user experience
13
+ - 🛡️ **Safe Code Execution**: Sandboxed code execution with timeouts and error handling
14
+ - 💾 **Session Management**: Persistent browser storage with import/export capabilities
15
+ - 📁 **Curated Examples**: Automatically loads demonstration sessions on first launch
16
+
17
+ ## Installation
18
+
19
+ ### From PyPI (when available)
20
+ ```bash
21
+ pip install pips-solver
22
+ ```
23
+
24
+ ### From Source
25
+ ```bash
26
+ git clone <repository-url>
27
+ cd pips
28
+ pip install -e .
29
+ ```
30
+
31
+ ### With Optional Dependencies
32
+ ```bash
33
+ # For web interface
34
+ pip install pips-solver[web]
35
+
36
+ # For development
37
+ pip install pips-solver[dev]
38
+
39
+ # All optional dependencies
40
+ pip install pips-solver[all]
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ### 1. Command Line Interface
46
+
47
+ Start the web interface:
48
+ ```bash
49
+ pips
50
+ # or
51
+ python -m pips
52
+
53
+ # Custom host and port
54
+ pips --host 127.0.0.1 --port 5000 --debug
55
+ ```
56
+
57
+ ### 2. Programmatic Usage
58
+
59
+ ```python
60
+ from pips import PIPSSolver, get_model
61
+ from pips.utils import RawInput
62
+
63
+ # Initialize a model
64
+ model = get_model("gpt-4o", api_key="your-openai-api-key")
65
+
66
+ # Create solver
67
+ solver = PIPSSolver(
68
+ model=model,
69
+ max_iterations=8,
70
+ temperature=0.0
71
+ )
72
+
73
+ # Solve a problem
74
+ problem = RawInput(
75
+ text_input="What is the sum of the first 10 prime numbers?",
76
+ image_input=None
77
+ )
78
+
79
+ # Chain of thought solving
80
+ answer, logs = solver.solve_chain_of_thought(problem)
81
+ print(f"Answer: {answer}")
82
+
83
+ # Code-based solving
84
+ answer, logs = solver.solve_with_code(problem)
85
+ print(f"Answer: {answer}")
86
+ ```
87
+
88
+ ### 3. Streaming Usage
89
+
90
+ ```python
91
+ def on_token(token, iteration, model_name):
92
+ print(f"Token: {token}", end="", flush=True)
93
+
94
+ def on_step(step, message, **kwargs):
95
+ print(f"Step {step}: {message}")
96
+
97
+ callbacks = {
98
+ "on_llm_streaming_token": on_token,
99
+ "on_step_update": on_step
100
+ }
101
+
102
+ # Solve with streaming
103
+ answer, logs = solver.solve_with_code(
104
+ problem,
105
+ stream=True,
106
+ callbacks=callbacks
107
+ )
108
+ ```
109
+
110
+ ## Supported Models
111
+
112
+ ### OpenAI Models
113
+ - GPT-4o, GPT-4o-mini
114
+ - GPT-4, GPT-4-turbo
115
+ - GPT-3.5-turbo
116
+ - O1-preview, O1-mini
117
+ - O3-mini (when available)
118
+
119
+ ### Anthropic Models
120
+ - Claude-3.5-sonnet
121
+ - Claude-3-opus, Claude-3-sonnet, Claude-3-haiku
122
+ - Claude-2.1, Claude-2.0
123
+
124
+ ### Google Models
125
+ - Gemini-2.0-flash-exp
126
+ - Gemini-1.5-pro, Gemini-1.5-flash
127
+ - Gemini-1.0-pro
128
+
129
+ ## API Reference
130
+
131
+ ### PIPSSolver
132
+
133
+ The main solver class for iterative problem solving.
134
+
135
+ ```python
136
+ PIPSSolver(
137
+ model: LLMModel,
138
+ max_iterations: int = 8,
139
+ temperature: float = 0.0,
140
+ max_tokens: int = 4096,
141
+ top_p: float = 1.0
142
+ )
143
+ ```
144
+
145
+ #### Methods
146
+
147
+ - `solve_chain_of_thought(sample, stream=False, callbacks=None)`: Solve using chain-of-thought reasoning
148
+ - `solve_with_code(sample, stream=False, callbacks=None)`: Solve using iterative code generation
149
+
150
+ ### Model Factory
151
+
152
+ ```python
153
+ from pips import get_model
154
+
155
+ # Get a model instance
156
+ model = get_model(model_name, api_key=None)
157
+ ```
158
+
159
+ ### Utilities
160
+
161
+ ```python
162
+ from pips.utils import RawInput, img2base64, base642img
163
+
164
+ # Create input with text and optional image
165
+ input_data = RawInput(
166
+ text_input="Your question here",
167
+ image_input=PIL.Image.open("image.jpg") # Optional
168
+ )
169
+ ```
170
+
171
+ ## Configuration
172
+
173
+ ### Environment Variables
174
+
175
+ Set your API keys as environment variables:
176
+
177
+ ```bash
178
+ export OPENAI_API_KEY="your-openai-key"
179
+ export ANTHROPIC_API_KEY="your-anthropic-key"
180
+ export GOOGLE_API_KEY="your-google-key"
181
+ ```
182
+
183
+ ### Web Interface Settings
184
+
185
+ The web interface allows you to configure:
186
+ - Model selection
187
+ - API keys
188
+ - Solving mode (chain-of-thought vs code)
189
+ - Temperature, max tokens, iterations
190
+ - Code execution timeout
191
+
192
+ ## Examples
193
+
194
+ ### Mathematical Problem
195
+ ```python
196
+ problem = RawInput(
197
+ text_input="Find the derivative of f(x) = x^3 + 2x^2 - 5x + 1",
198
+ image_input=None
199
+ )
200
+ answer, logs = solver.solve_with_code(problem)
201
+ ```
202
+
203
+ ### Image-Based Problem
204
+ ```python
205
+ from PIL import Image
206
+
207
+ image = Image.open("chart.png")
208
+ problem = RawInput(
209
+ text_input="What is the trend shown in this chart?",
210
+ image_input=image
211
+ )
212
+ answer, logs = solver.solve_chain_of_thought(problem)
213
+ ```
214
+
215
+ ### Multi-Step Reasoning
216
+ ```python
217
+ problem = RawInput(
218
+ text_input="""
219
+ A company has 3 departments with 10, 15, and 20 employees respectively.
220
+ If they want to form a committee with 2 people from each department,
221
+ how many different committees are possible?
222
+ """,
223
+ image_input=None
224
+ )
225
+ answer, logs = solver.solve_with_code(problem)
226
+ ```
227
+
228
+ ## Web Interface
229
+
230
+ The web interface provides:
231
+ - **Problem Input**: Text area with optional image upload
232
+ - **Model Selection**: Choose from available LLM providers
233
+ - **Settings Panel**: Configure solving parameters
234
+ - **Real-time Streaming**: Watch the AI solve problems step-by-step
235
+ - **Chat History**: Review previous solutions
236
+ - **Export Options**: Download chat logs and solutions
237
+
238
+ ## Session Management
239
+
240
+ PIPS includes comprehensive session management capabilities:
241
+
242
+ ### Automatic Session Loading
243
+ - **First Launch**: Automatically loads curated example sessions demonstrating PIPS capabilities
244
+ - **Persistent Storage**: All sessions are saved in browser localStorage for persistence across visits
245
+ - **Smart Cleanup**: Automatically removes incomplete or invalid sessions
246
+
247
+ ### Import/Export Sessions
248
+ - **Bulk Export**: Export all sessions as a JSON file via the "Export" button
249
+ - **Individual Export**: Download single sessions using the download icon next to each session
250
+ - **Import Sessions**: Import previously exported session files via the "Import" button
251
+ - **Duplicate Detection**: Automatically detects and handles duplicate sessions during import
252
+
253
+ ### Session Format
254
+ Sessions are exported in a portable JSON format:
255
+ ```json
256
+ {
257
+ "exportDate": "2024-01-15T10:00:00.000Z",
258
+ "sessions": {
259
+ "session_id": {
260
+ "id": "session_id",
261
+ "title": "Session title",
262
+ "problemText": "Original problem description",
263
+ "image": "base64_image_data_or_null",
264
+ "createdAt": "2024-01-15T09:00:00.000Z",
265
+ "lastUsed": "2024-01-15T09:15:00.000Z",
266
+ "status": "completed|interrupted|solving|active",
267
+ "chatHistory": [
268
+ {
269
+ "sender": "PIPS|AI Assistant|User",
270
+ "content": "Message content",
271
+ "iteration": "Iteration 1",
272
+ "timestamp": "2024-01-15T09:01:00.000Z"
273
+ }
274
+ ]
275
+ }
276
+ }
277
+ }
278
+ ```
279
+
280
+ ### Session States
281
+ - **Active**: New sessions where users can input problems
282
+ - **Solving**: Sessions currently being processed by PIPS
283
+ - **Completed**: Successfully finished sessions (read-only)
284
+ - **Interrupted**: Sessions stopped by user or error (read-only)
285
+
286
+ ## Development
287
+
288
+ ### Setup Development Environment
289
+ ```bash
290
+ git clone <repository-url>
291
+ cd pips
292
+ pip install -e .[dev]
293
+ ```
294
+
295
+ ### Running Tests
296
+ ```bash
297
+ pytest
298
+ pytest --cov=pips # With coverage
299
+ ```
300
+
301
+ ### Code Formatting
302
+ ```bash
303
+ black pips/
304
+ isort pips/
305
+ flake8 pips/
306
+ mypy pips/
307
+ ```
308
+
309
+ ## Contributing
310
+
311
+ 1. Fork the repository
312
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
313
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
314
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
315
+ 5. Open a Pull Request
316
+
317
+ ## License
318
+
319
+ This project is licensed under the MIT License - see the LICENSE file for details.
320
+
321
+ ## Acknowledgments
322
+
323
+ - OpenAI for GPT models
324
+ - Anthropic for Claude models
325
+ - Google for GenAI models
326
+ - Flask and SocketIO communities
digest.txt ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pips-solver"
7
+ version = "1.0.0"
8
+ description = "Python Iterative Problem Solving (PIPS) - A library for iterative code generation and refinement using LLMs"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [
12
+ {name = "PIPS Development Team", email = "contact@example.com"}
13
+ ]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ ]
29
+ keywords = ["llm", "code-generation", "ai", "problem-solving", "iterative"]
30
+ requires-python = ">=3.8"
31
+ dependencies = [
32
+ "openai>=1.0.0",
33
+ "anthropic>=0.7.0",
34
+ "google-genai>=0.2.0",
35
+ "flask>=2.0.0",
36
+ "flask-socketio>=5.0.0",
37
+ "pillow>=8.0.0",
38
+ "timeout-decorator>=0.5.0",
39
+ "python-socketio[client]>=5.0.0",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ web = [
44
+ "flask>=2.0.0",
45
+ "flask-socketio>=5.0.0",
46
+ "python-socketio[client]>=5.0.0",
47
+ ]
48
+ dev = [
49
+ "pytest>=6.0.0",
50
+ "pytest-cov>=2.0.0",
51
+ "black>=22.0.0",
52
+ "flake8>=4.0.0",
53
+ "mypy>=0.950",
54
+ "isort>=5.0.0",
55
+ ]
56
+ all = [
57
+ "pips-solver[web,dev]"
58
+ ]
59
+
60
+ [project.scripts]
61
+ pips = "pips.__main__:main"
62
+
63
+ [project.urls]
64
+ Homepage = "https://github.com/example/pips"
65
+ Repository = "https://github.com/example/pips"
66
+ Issues = "https://github.com/example/pips/issues"
67
+ Documentation = "https://github.com/example/pips#readme"
68
+
69
+ [tool.hatch.build.targets.wheel]
70
+ packages = ["src/pips"]
71
+
72
+ [tool.hatch.build.targets.sdist]
73
+ include = [
74
+ "/src/pips",
75
+ "/README.md",
76
+ "/LICENSE",
77
+ ]
78
+
79
+ [tool.black]
80
+ line-length = 88
81
+ target-version = ['py38']
82
+
83
+ [tool.isort]
84
+ profile = "black"
85
+ multi_line_output = 3
86
+
87
+ [tool.mypy]
88
+ python_version = "3.8"
89
+ warn_return_any = true
90
+ warn_unused_configs = true
91
+ disallow_untyped_defs = false
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai>=1.0.0
2
+ anthropic>=0.7.0
3
+ google-genai>=0.2.0
4
+ flask>=2.0.0
5
+ flask-socketio>=5.0.0
6
+ pillow>=8.0.0
7
+ timeout-decorator>=0.5.0
8
+ python-socketio[client]>=5.0.0
src/pips/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIPS: Python Iterative Problem Solving
3
+
4
+ A library for iterative code generation and refinement using LLMs.
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+
9
+ from .core import PIPSSolver, PIPSMode
10
+ from .models import get_model
11
+ from .model_registry import register_model
12
+
13
+ try:
14
+ from .web_app import run_app
15
+ __all__ = ["PIPSSolver", "PIPSMode", "get_model", "register_model", "run_app"]
16
+ except ImportError:
17
+ __all__ = ["PIPSSolver", "PIPSMode", "get_model", "register_model"]
src/pips/__main__.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ PIPS entry-point.
4
+
5
+ Usage:
6
+ python -m pips # starts on 0.0.0.0:8080
7
+ python -m pips --port 5000 # custom port
8
+ python -m pips --host 127.0.0.1 --debug
9
+ """
10
+
11
+ import argparse
12
+ import sys
13
+
14
+ # Import the runner we exposed in the simplified web_app.py
15
+ from .web_app import run_app
16
+
17
+
18
+
19
+
20
+
21
+ def main() -> None:
22
+ parser = argparse.ArgumentParser(
23
+ prog="pips",
24
+ description="PIPS – Python Iterative Problem Solving web interface",
25
+ )
26
+
27
+ parser.add_argument(
28
+ "-p", "--port",
29
+ type=int,
30
+ default=8080,
31
+ help="HTTP port to listen on (default 8080)",
32
+ )
33
+ parser.add_argument(
34
+ "--host",
35
+ type=str,
36
+ default="0.0.0.0",
37
+ help="Bind address (default 0.0.0.0)",
38
+ )
39
+ parser.add_argument(
40
+ "--debug",
41
+ action="store_true",
42
+ help="Enable Flask/SockeIO debug mode",
43
+ )
44
+
45
+ args = parser.parse_args()
46
+
47
+ print(f"▶️ PIPS web UI: http://{args.host}:{args.port} (debug={args.debug})")
48
+
49
+ try:
50
+ run_app(host=args.host, port=args.port, debug=args.debug)
51
+ except KeyboardInterrupt:
52
+ print("\n👋 Shutting down PIPS—good-bye!")
53
+ sys.exit(0)
54
+ except Exception as exc: # pragma: no cover
55
+ print(f"❌ Fatal error starting PIPS: {exc}")
56
+ sys.exit(1)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ main()
src/pips/core.py ADDED
@@ -0,0 +1,991 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import re, json
3
+ from enum import Enum
4
+ from typing import Any, Dict, List, Tuple, Optional, Callable
5
+ from .utils import RawInput, img2base64, python_eval
6
+ from .models import LLMModel, SamplingParams
7
+
8
+
9
+ # ---------------------------------------------------------------------
10
+ # PIPSMode enum for agent vs interactive modes
11
+ # ---------------------------------------------------------------------
12
+ class PIPSMode(Enum):
13
+ AGENT = "AGENT"
14
+ INTERACTIVE = "INTERACTIVE"
15
+
16
+
17
+ # ---------------------------------------------------------------------
18
+ # Helper-type aliases
19
+ TokenCb = Callable[[str, int, str], None]
20
+ CbMap = Dict[str, Callable[..., Any]]
21
+ # ---------------------------------------------------------------------
22
+
23
+
24
+ class PIPSSolver:
25
+ """Python Iterative Problem Solving (PIPS) solver — unified streaming & non-streaming."""
26
+
27
+ def __init__(
28
+ self,
29
+ model: LLMModel,
30
+ *,
31
+ max_iterations: int = 8,
32
+ temperature: float = 0.0,
33
+ max_tokens: int = 4096,
34
+ top_p: float = 1.0,
35
+ interactive: bool = False,
36
+ critic_model: Optional[LLMModel] = None,
37
+ ):
38
+ """
39
+ Args:
40
+ model: An object that implements .chat(...) and, optionally, .stream_chat(...).
41
+ max_iterations: Maximum refinement loops for the code-generation mode.
42
+ temperature: Sampling temperature passed to the LLM.
43
+ max_tokens: Max tokens for each LLM response.
44
+ top_p: Nucleus-sampling parameter.
45
+ interactive: Whether to use interactive mode (wait for user feedback).
46
+ critic_model: Optional separate model for criticism (defaults to main model).
47
+ """
48
+ self.model = model
49
+ self.critic_model = critic_model or model
50
+ self.max_iterations = max_iterations
51
+ self.temperature = temperature
52
+ self.max_tokens = max_tokens
53
+ self.top_p = top_p
54
+ self.interactive = interactive
55
+ self._mode_decision_summary: Optional[Dict[str, Any]] = None
56
+
57
+ # Interactive mode state
58
+ self._checkpoint = None
59
+ self._current_conversation = None
60
+
61
+ # System prompt identical to the original implementation
62
+ self.system_prompt = """You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.
63
+
64
+ You MUST always plan extensively before outputting any symbols or code.
65
+
66
+ You MUST iterate and keep going until the problem is solved.
67
+
68
+ # Workflow
69
+
70
+ ## Problem Solving Steps
71
+ 1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.
72
+ 2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.
73
+ 3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.
74
+ The JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == "__main__"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.
75
+ The Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.
76
+ Be careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.
77
+ Be sure to always output a JSON code block and a Python code block.
78
+ Make sure to follow these formatting requirements exactly.
79
+ """
80
+
81
+
82
+ # ========= INTERNAL HELPERS =====================================
83
+ _MODE_SELECTION_LIST_RE = re.compile(r"\[([0-9eE+.\s,-]+)\]")
84
+
85
+ def _parse_probability_scores(self, raw: str) -> Optional[List[float]]:
86
+ """Extract a list of 10 probability scores from raw LLM output."""
87
+ if not raw:
88
+ return None
89
+
90
+ candidates: List[Any] = []
91
+
92
+ try:
93
+ parsed = ast.literal_eval(raw.strip())
94
+ candidates.append(parsed)
95
+ except Exception:
96
+ pass
97
+
98
+ for match in self._MODE_SELECTION_LIST_RE.finditer(raw):
99
+ candidate_str = f"[{match.group(1)}]"
100
+ try:
101
+ candidates.append(ast.literal_eval(candidate_str))
102
+ except Exception:
103
+ continue
104
+
105
+ for candidate in candidates:
106
+ if (
107
+ isinstance(candidate, list)
108
+ and len(candidate) == 10
109
+ and all(isinstance(x, (int, float)) for x in candidate)
110
+ ):
111
+ floats = [float(x) for x in candidate]
112
+ if all(0.0 <= x <= 1.0 for x in floats):
113
+ return floats
114
+ return None
115
+
116
+ def _build_mode_selection_prompt(self, sample: RawInput) -> List[dict[str, Any]]:
117
+ """Create the conversation for deciding between code and chain-of-thought."""
118
+ from .prompts import CHOOSE_CONSERVATIVE_COT_VS_CODE_PROMPT
119
+
120
+ instructions = CHOOSE_CONSERVATIVE_COT_VS_CODE_PROMPT.strip()
121
+ extra_instruction = (
122
+ "\nAt the end of your response, output only the list of 10 probabilities inside square brackets "
123
+ "after the text 'FINAL ANSWER:'."
124
+ )
125
+
126
+ content: List[dict[str, Any]] = [{"type": "text", "text": f"{instructions}{extra_instruction}"}]
127
+
128
+ if sample.image_input is not None:
129
+ content.append(
130
+ {
131
+ "type": "image_url",
132
+ "image_url": {
133
+ "url": f"data:image/jpeg;base64,{img2base64(sample.image_input)}",
134
+ "detail": "high",
135
+ },
136
+ }
137
+ )
138
+ if sample.text_input is not None:
139
+ content.append({"type": "text", "text": f"TARGET QUESTION:\n{sample.text_input}"})
140
+
141
+ return [{"role": "user", "content": content}]
142
+
143
+ def _summarise_messages_for_log(self, messages: List[dict[str, Any]]) -> List[dict[str, Any]]:
144
+ """Return a copy of the conversation with image payloads redacted for logging."""
145
+ summary: List[dict[str, Any]] = []
146
+ for message in messages:
147
+ content = message.get("content")
148
+ if isinstance(content, list):
149
+ new_content: List[dict[str, Any]] = []
150
+ for item in content:
151
+ if isinstance(item, dict) and item.get("type") == "image_url":
152
+ new_content.append({"type": "text", "text": "[image content omitted]"})
153
+ else:
154
+ new_content.append(item)
155
+ summary.append({**message, "content": new_content})
156
+ else:
157
+ summary.append(dict(message))
158
+ return summary
159
+
160
+ def _decide_solving_mode(
161
+ self,
162
+ messages: List[dict[str, Any]],
163
+ *,
164
+ max_tokens: int,
165
+ ) -> Dict[str, Any]:
166
+ """Run the self-reflection prompt to choose between code and chain-of-thought."""
167
+ sampling_params = SamplingParams(temperature=0.0, max_tokens=max_tokens, top_p=1.0)
168
+
169
+ try:
170
+ response = self.model.chat(messages, sampling_params=sampling_params, use_tqdm=False)
171
+ except Exception as exc:
172
+ print(f"[DEBUG] Mode selection prompt raised exception: {exc}. Falling back to chain-of-thought.")
173
+ return {
174
+ "use_code": False,
175
+ "scores": None,
176
+ "average": None,
177
+ "raw_response": "",
178
+ "error": str(exc),
179
+ }
180
+
181
+ raw_text = ""
182
+ if response and getattr(response[0], "outputs", None):
183
+ raw_text = response[0].outputs[0].text or ""
184
+
185
+ scores = self._parse_probability_scores(raw_text)
186
+ if scores is None:
187
+ print("[DEBUG] Mode selection prompt failed to yield valid probability list; defaulting to chain-of-thought.")
188
+ return {
189
+ "use_code": False,
190
+ "scores": None,
191
+ "average": None,
192
+ "raw_response": raw_text,
193
+ "error": None,
194
+ }
195
+
196
+ average = sum(scores) / len(scores)
197
+ use_code = average > 0.5
198
+
199
+ return {
200
+ "use_code": use_code,
201
+ "scores": scores,
202
+ "average": average,
203
+ "raw_response": raw_text,
204
+ "error": None,
205
+ }
206
+
207
+ def _chat(
208
+ self,
209
+ conversation: List[Dict[str, Any]],
210
+ sampling_params: SamplingParams,
211
+ stream: bool,
212
+ iteration: int,
213
+ callbacks: Optional[CbMap] = None,
214
+ ) -> str:
215
+ """
216
+ Wrapper around model.chat / model.stream_chat that:
217
+ • chooses the right API based on `stream`
218
+ • fires streaming callbacks if supplied
219
+ • returns the full assistant text
220
+ """
221
+ callbacks = callbacks or {}
222
+
223
+ # Dummy lambdas so we can call without branch checks later
224
+ on_start = callbacks.get("on_llm_streaming_start", lambda *a, **k: None)
225
+ on_token = callbacks.get("on_llm_streaming_token", lambda *a, **k: None)
226
+ on_end = callbacks.get("on_llm_streaming_end", lambda *a, **k: None)
227
+ interrupted = callbacks.get("check_interrupted", lambda: False)
228
+
229
+ model_name = self.model.__class__.__name__
230
+
231
+ if not stream:
232
+ # plain synchronous call
233
+ resp = self.model.chat(conversation, sampling_params=sampling_params, use_tqdm=False)
234
+ return resp[0].outputs[0].text
235
+
236
+ # ---- streaming path ----
237
+ on_start(iteration, model_name)
238
+
239
+ def _emit(tok: str):
240
+ if not interrupted():
241
+ on_token(tok, iteration, model_name)
242
+
243
+ if hasattr(self.model, "stream_chat"):
244
+ resp = self.model.stream_chat(
245
+ conversation,
246
+ sampling_params=sampling_params,
247
+ emit_callback=_emit,
248
+ interrupted_callback=interrupted,
249
+ )
250
+ else: # fallback
251
+ resp = self.model.chat(conversation, sampling_params=sampling_params, use_tqdm=False)
252
+
253
+ on_end(iteration, model_name)
254
+ return resp[0].outputs[0].text
255
+
256
+ # ---------------------------------------------------------------
257
+
258
+ def solve(
259
+ self,
260
+ sample: RawInput,
261
+ *,
262
+ stream: bool = False,
263
+ callbacks: Optional[CbMap] = None,
264
+ additional_rules: str = "",
265
+ decision_max_tokens: int = 1024,
266
+ interactive_requested: bool = False,
267
+ ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
268
+ """Automatically choose between chain-of-thought and code-based solving."""
269
+ callbacks = callbacks or {}
270
+ step = callbacks.get("on_step_update", lambda *a, **k: None)
271
+
272
+ decision_messages = self._build_mode_selection_prompt(sample)
273
+ decision_prompt_details = {
274
+ "description": "Choosing between chain-of-thought and iterative coding",
275
+ "conversation": self._summarise_messages_for_log(decision_messages),
276
+ }
277
+
278
+ step(
279
+ "mode_selection",
280
+ "Choosing between chain-of-thought reasoning and iterative coding…",
281
+ prompt_details=decision_prompt_details,
282
+ )
283
+
284
+ decision = self._decide_solving_mode(decision_messages, max_tokens=decision_max_tokens)
285
+ use_code = decision.get("use_code", False)
286
+ average = decision.get("average")
287
+ scores = decision.get("scores")
288
+ decision_error = decision.get("error")
289
+
290
+ if scores is None:
291
+ decision_message = "Could not parse confidence scores; defaulting to chain-of-thought reasoning."
292
+ else:
293
+ decision_message = (
294
+ f"Average code suitability score: {average:.2f}. "
295
+ f"Proceeding with {'iterative code generation' if use_code else 'chain-of-thought reasoning'}."
296
+ )
297
+
298
+ step(
299
+ "mode_selection",
300
+ decision_message,
301
+ prompt_details={**decision_prompt_details, "raw_response": decision.get("raw_response", ""), "error": decision_error},
302
+ )
303
+
304
+ if interactive_requested and not use_code:
305
+ step(
306
+ "mode_selection",
307
+ "Interactive mode requested, but chain-of-thought was selected; running without interactive checkpoints.",
308
+ prompt_details=None,
309
+ )
310
+
311
+ mode_decision_summary = {
312
+ "use_code": use_code,
313
+ "scores": scores,
314
+ "average_score": average,
315
+ "raw_response": decision.get("raw_response", ""),
316
+ "prompt": decision_prompt_details["conversation"],
317
+ "error": decision_error,
318
+ }
319
+ self._mode_decision_summary = mode_decision_summary
320
+
321
+ original_interactive = self.interactive
322
+ if not use_code:
323
+ self.interactive = False
324
+
325
+ try:
326
+ if use_code:
327
+ answer, logs = self.solve_with_code(
328
+ sample,
329
+ stream=stream,
330
+ callbacks=callbacks,
331
+ additional_rules=additional_rules,
332
+ )
333
+ else:
334
+ answer, logs = self.solve_chain_of_thought(
335
+ sample,
336
+ stream=stream,
337
+ callbacks=callbacks,
338
+ additional_rules=additional_rules,
339
+ )
340
+ finally:
341
+ self.interactive = original_interactive
342
+
343
+ if isinstance(logs, dict):
344
+ logs.setdefault("mode_decision", mode_decision_summary)
345
+
346
+ return answer, logs, mode_decision_summary
347
+
348
+ def _extract_components(self, output: str) -> Tuple[Any, str, str]:
349
+ """(unchanged helper) extract JSON, code, and reasoning."""
350
+ json_obj, code_str, reasoning = "", "", ""
351
+ try:
352
+ if m := re.findall(r"```json(.*?)```", output, re.DOTALL):
353
+ json_obj = json.loads(m[-1])
354
+ except Exception:
355
+ pass
356
+ try:
357
+ j_end = output.index("```", output.index("```json") + 7) + 3
358
+ p_start = output.index("```python", j_end)
359
+ reasoning = output[j_end:p_start].strip()
360
+ except Exception:
361
+ pass
362
+ try:
363
+ if m := re.findall(r"```python(.*?)```", output, re.DOTALL):
364
+ code_str = m[-1]
365
+ except Exception:
366
+ pass
367
+ return json_obj, code_str, reasoning
368
+
369
+ # ========= PUBLIC SOLVERS ======================================
370
+
371
+ def solve_chain_of_thought(
372
+ self,
373
+ sample: RawInput,
374
+ *,
375
+ stream: bool = False,
376
+ callbacks: Optional[CbMap] = None,
377
+ additional_rules: str = "",
378
+ ) -> Tuple[str, Dict[str, Any]]:
379
+ """
380
+ One implementation covers both streaming & non-streaming.
381
+ If `stream=True`, supply the standard streaming callbacks.
382
+ """
383
+ callbacks = callbacks or {}
384
+ step = callbacks.get("on_step_update", lambda *a, **k: None)
385
+ logs: Dict[str, Any] = {}
386
+
387
+ # Build prompt with additional rules if provided
388
+ system_content = ""
389
+ if additional_rules.strip():
390
+ system_content = f"Additional Requirements:\n{additional_rules.strip()}\n\nMake sure to follow these additional requirements when answering."
391
+ print(f"[DEBUG] Added custom rules to chain of thought prompt: {repr(additional_rules)}")
392
+
393
+ if sample.image_input is not None:
394
+ img_b64 = img2base64(sample.image_input)
395
+ user_content = [
396
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
397
+ {"type": "text", "text": f"Question: {sample.text_input}"},
398
+ {"type": "text", "text": "Answer step-by-step and finish with 'FINAL ANSWER:'"},
399
+ ]
400
+ else:
401
+ user_content = f"Question: {sample.text_input}\nAnswer step-by-step and finish with 'FINAL ANSWER:'."
402
+
403
+ prompt = []
404
+ if system_content:
405
+ prompt.append({"role": "system", "content": system_content})
406
+ prompt.append({"role": "user", "content": user_content})
407
+ params = SamplingParams(self.temperature, self.max_tokens, self.top_p)
408
+
409
+ # Create prompt details for chain of thought
410
+ cot_prompt_details = {
411
+ "description": "Chain of thought reasoning",
412
+ "conversation": prompt
413
+ }
414
+
415
+ step("reasoning", "Thinking step-by-step...", prompt_details=cot_prompt_details)
416
+
417
+ # Call LLM through unified wrapper
418
+ output = self._chat(prompt, params, stream, iteration=0, callbacks=callbacks)
419
+ logs["output"] = output
420
+
421
+ # Parse FINAL ANSWER (same logic)
422
+ ans = ""
423
+ try:
424
+ ans = re.findall(r"FINAL ANSWER:(.*)", output, re.DOTALL)[-1].strip()
425
+ except Exception:
426
+ pass
427
+
428
+ # Check if we were interrupted during processing
429
+ interrupted = callbacks.get("check_interrupted", lambda: False)
430
+ if interrupted():
431
+ step("interrupted", "PIPS was interrupted by the user.", prompt_details=None)
432
+ else:
433
+ step("finished", "Chain of thought completed!", prompt_details=None)
434
+
435
+ final = f"FINAL ANSWER: {ans}" if ans else output
436
+ logs["final_answer"] = ans
437
+ return final, logs
438
+
439
+ # ---------------------------------------------------------------
440
+
441
+ def solve_with_code(
442
+ self,
443
+ sample: RawInput,
444
+ *,
445
+ stream: bool = False,
446
+ callbacks: Optional[CbMap] = None,
447
+ additional_rules: str = "",
448
+ ) -> Tuple[str, Dict[str, Any]]:
449
+ """
450
+ Iterative code-generation solver (streaming or not).
451
+ `callbacks` is optional; provide it only when you care about
452
+ fine-grained streaming events.
453
+ Args:
454
+ sample: The raw input containing text and/or image.
455
+ stream: Whether to stream tokens from the underlying LLM.
456
+ callbacks: Optional callback map for streaming & execution events.
457
+ additional_rules: Extra natural-language rules that will be forwarded to the internal code critic for more specialized checking.
458
+ """
459
+ callbacks = callbacks or {}
460
+ interrupted = callbacks.get("check_interrupted", lambda: False)
461
+ step = callbacks.get("on_step_update", lambda *a, **k: None)
462
+
463
+ logs = {"all_outputs": [], "all_symbols": [], "all_programs": [], "all_reasoning": []}
464
+
465
+ # Abort early?
466
+ if interrupted():
467
+ return "", logs
468
+
469
+ # ---- Build initial prompt with custom rules ----
470
+ # Create system prompt with additional rules if provided
471
+ system_content = self.system_prompt
472
+ if additional_rules.strip():
473
+ system_content += f"\n\nAdditional Requirements: \n{additional_rules.strip()}\n\n Make sure to follow these additional requirements when generating your solution."
474
+ print(f"[DEBUG] Added custom rules to initial code generation prompt: {repr(additional_rules)}")
475
+
476
+ if sample.image_input is not None:
477
+ img_b64 = img2base64(sample.image_input)
478
+ content = [
479
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}},
480
+ {"type": "text", "text": sample.text_input},
481
+ ]
482
+ else:
483
+ content = sample.text_input
484
+
485
+ conv = [
486
+ {"role": "system", "content": system_content},
487
+ {"role": "user", "content": content},
488
+ ]
489
+ params = SamplingParams(self.temperature, self.max_tokens, self.top_p)
490
+
491
+ # Create prompt details for initial generation
492
+ initial_prompt_details = {
493
+ "description": "Initial solution generation",
494
+ "conversation": conv
495
+ }
496
+
497
+ step("initial_generation", "Generating first solution…", prompt_details=initial_prompt_details)
498
+ raw = self._chat(conv, params, stream, iteration=0, callbacks=callbacks)
499
+ logs["all_outputs"].append(raw)
500
+ conv.append({"role": "assistant", "content": raw})
501
+
502
+ # Extract JSON / code / reasoning
503
+ current_symbols, current_code, reasoning = self._extract_components(raw)
504
+ logs["all_symbols"].append(current_symbols)
505
+ logs["all_programs"].append(current_code)
506
+ if reasoning:
507
+ logs["all_reasoning"].append(reasoning)
508
+
509
+ # -------- execute & refine up to max_iterations --------
510
+ exec_out, stdout, err = self._run_code(current_symbols, current_code, 0, callbacks, logs)
511
+ for i in range(1, self.max_iterations + 1):
512
+ if interrupted():
513
+ break
514
+
515
+ # --- evaluate code quality with prompt details ---
516
+ feedback = self._critic(
517
+ question=sample.text_input,
518
+ code=current_code,
519
+ symbols=current_symbols,
520
+ out=exec_out,
521
+ stdout=stdout,
522
+ err=err,
523
+ params=params,
524
+ additional_rules=additional_rules,
525
+ stream=stream,
526
+ iteration=i,
527
+ callbacks=callbacks,
528
+ )
529
+ # Note: feedback is now displayed via streaming, no need for legacy callback
530
+
531
+ # Interactive mode: wait for user feedback if enabled
532
+ if self.interactive:
533
+ print(f"[DEBUG] Interactive mode triggered at iteration {i}")
534
+ # Emit waiting for user feedback event
535
+ on_waiting_for_user = callbacks.get("on_waiting_for_user", lambda *a, **k: None)
536
+ on_waiting_for_user(i, feedback, current_code, current_symbols)
537
+ print(f"[DEBUG] Emitted awaiting_user_feedback event")
538
+
539
+ # Store checkpoint for later continuation
540
+ self._checkpoint = {
541
+ "sample": sample,
542
+ "logs": logs,
543
+ "conv": conv,
544
+ "symbols": current_symbols,
545
+ "code": current_code,
546
+ "exec_out": exec_out,
547
+ "stdout": stdout,
548
+ "err": err,
549
+ "feedback": feedback,
550
+ "iteration": i,
551
+ "params": params,
552
+ "additional_rules": additional_rules,
553
+ "stream": stream,
554
+ "callbacks": callbacks
555
+ }
556
+
557
+ # Return control to web_app - it will call continue_from_checkpoint
558
+ return "", logs
559
+
560
+ # ask model to improve
561
+ fix_prompt = self._fix_prompt(sample.text_input, current_code, current_symbols, exec_out, stdout, err, feedback)
562
+ conv.append({"role": "user", "content": fix_prompt})
563
+
564
+ # Create prompt details for refinement
565
+ refinement_prompt_details = {
566
+ "description": f"Solution refinement (iteration {i})",
567
+ "conversation": conv
568
+ }
569
+
570
+ step("refinement", f"Refining solution (iteration {i})...", iteration=i, prompt_details=refinement_prompt_details)
571
+ raw = self._chat(conv, params, stream, iteration=i, callbacks=callbacks)
572
+ logs["all_outputs"].append(raw)
573
+ conv.append({"role": "assistant", "content": raw})
574
+
575
+ if "FINISHED" in raw:
576
+ break
577
+
578
+ # update code / symbols
579
+ new_symbols, new_code, reasoning = self._extract_components(raw)
580
+ if new_symbols:
581
+ current_symbols = new_symbols
582
+ logs["all_symbols"].append(new_symbols)
583
+ if new_code:
584
+ current_code = new_code
585
+ logs["all_programs"].append(new_code)
586
+ if reasoning:
587
+ logs["all_reasoning"].append(reasoning)
588
+
589
+ exec_out, stdout, err = self._run_code(current_symbols, current_code, i, callbacks, logs)
590
+
591
+ # Check if we were interrupted during processing
592
+ if interrupted():
593
+ step("interrupted", "PIPS was interrupted by the user.", prompt_details=None)
594
+ else:
595
+ step("finished", "Solution completed successfully!", prompt_details=None)
596
+
597
+ final = f"FINAL ANSWER: {exec_out}"
598
+ return final, logs
599
+
600
+ # ========= INTERACTIVE MODE HELPERS ============================
601
+
602
+ def continue_from_checkpoint(self, user_feedback: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
603
+ """
604
+ Continue solving from a saved checkpoint with user feedback.
605
+
606
+ Args:
607
+ user_feedback: Dictionary containing user feedback with keys:
608
+ - accept_critic: bool - whether to accept critic's feedback
609
+ - extra_comments: str - additional user comments
610
+ - quoted_ranges: list - specific code snippets user highlighted
611
+ - terminate: bool - whether user wants to terminate
612
+
613
+ Returns:
614
+ Final answer and logs
615
+ """
616
+ if not self._checkpoint:
617
+ raise ValueError("No checkpoint available - cannot continue interactive mode")
618
+
619
+ checkpoint = self._checkpoint
620
+ user_feedback = user_feedback or {}
621
+
622
+ # Check if user wants to terminate
623
+ if user_feedback.get("terminate", False):
624
+ final = f"FINAL ANSWER: {checkpoint['exec_out']}"
625
+ return final, checkpoint["logs"]
626
+
627
+ # Merge critic feedback with user feedback
628
+ merged_feedback = self.merge_user_feedback(
629
+ checkpoint["feedback"],
630
+ user_feedback.get("accept_critic", True),
631
+ user_feedback.get("quoted_ranges", [])
632
+ )
633
+
634
+ # Check if user provided any feedback
635
+ has_user_feedback = bool(user_feedback.get("quoted_ranges", []))
636
+
637
+ # Continue the solving process
638
+ current_symbols = checkpoint["symbols"]
639
+ current_code = checkpoint["code"]
640
+ exec_out = checkpoint["exec_out"]
641
+ stdout = checkpoint["stdout"]
642
+ err = checkpoint["err"]
643
+
644
+ fix_prompt = self._fix_prompt(
645
+ checkpoint["sample"].text_input,
646
+ current_code,
647
+ current_symbols,
648
+ exec_out,
649
+ stdout,
650
+ err,
651
+ merged_feedback,
652
+ has_user_feedback
653
+ )
654
+
655
+ checkpoint["conv"].append({"role": "user", "content": fix_prompt})
656
+
657
+ # Create prompt details for refinement
658
+ refinement_prompt_details = {
659
+ "description": f"Solution refinement (iteration {checkpoint['iteration']})",
660
+ "conversation": checkpoint["conv"]
661
+ }
662
+
663
+ step = checkpoint["callbacks"].get("on_step_update", lambda *a, **k: None)
664
+ step("refinement", f"Refining solution (iteration {checkpoint['iteration']})...",
665
+ iteration=checkpoint['iteration'], prompt_details=refinement_prompt_details)
666
+
667
+ raw = self._chat(checkpoint["conv"], checkpoint["params"], checkpoint["stream"],
668
+ iteration=checkpoint['iteration'], callbacks=checkpoint["callbacks"])
669
+
670
+ checkpoint["logs"]["all_outputs"].append(raw)
671
+ checkpoint["conv"].append({"role": "assistant", "content": raw})
672
+
673
+ if "FINISHED" in raw:
674
+ final = f"FINAL ANSWER: {checkpoint['exec_out']}"
675
+ return final, checkpoint["logs"]
676
+
677
+ # Update code/symbols and continue
678
+ new_symbols, new_code, reasoning = self._extract_components(raw)
679
+ if new_symbols:
680
+ current_symbols = new_symbols
681
+ checkpoint["logs"]["all_symbols"].append(new_symbols)
682
+ if new_code:
683
+ current_code = new_code
684
+ checkpoint["logs"]["all_programs"].append(new_code)
685
+ if reasoning:
686
+ checkpoint["logs"]["all_reasoning"].append(reasoning)
687
+
688
+ exec_out, stdout, err = self._run_code(current_symbols, current_code, checkpoint['iteration'],
689
+ checkpoint["callbacks"], checkpoint["logs"])
690
+ checkpoint["symbols"] = current_symbols
691
+ checkpoint["code"] = current_code
692
+ checkpoint["exec_out"] = exec_out
693
+ checkpoint["stdout"] = stdout
694
+ checkpoint["err"] = err
695
+
696
+ # Temporarily disable interactive mode and continue with remaining iterations
697
+ original_interactive = self.interactive
698
+ self.interactive = False
699
+
700
+ # Continue solving from next iteration
701
+ remaining_iterations = self.max_iterations - checkpoint['iteration']
702
+ if remaining_iterations > 0:
703
+ # Create a new sample with current state
704
+ sample = checkpoint["sample"]
705
+
706
+ # Continue refinement loop
707
+ for i in range(checkpoint['iteration'] + 1, self.max_iterations + 1):
708
+ interrupted = checkpoint["callbacks"].get("check_interrupted", lambda: False)
709
+ if interrupted():
710
+ break
711
+
712
+ feedback = self._critic(
713
+ question=sample.text_input,
714
+ code=current_code,
715
+ symbols=current_symbols,
716
+ out=exec_out,
717
+ stdout=stdout,
718
+ err=err,
719
+ params=checkpoint["params"],
720
+ additional_rules=checkpoint["additional_rules"],
721
+ stream=checkpoint["stream"],
722
+ iteration=i,
723
+ callbacks=checkpoint["callbacks"],
724
+ )
725
+
726
+ fix_prompt = self._fix_prompt(sample.text_input, current_code, current_symbols, exec_out, stdout, err, feedback)
727
+ checkpoint["conv"].append({"role": "user", "content": fix_prompt})
728
+
729
+ refinement_prompt_details = {
730
+ "description": f"Solution refinement (iteration {i})",
731
+ "conversation": checkpoint["conv"]
732
+ }
733
+
734
+ step("refinement", f"Refining solution (iteration {i})...",
735
+ iteration=i, prompt_details=refinement_prompt_details)
736
+
737
+ raw = self._chat(checkpoint["conv"], checkpoint["params"], checkpoint["stream"],
738
+ iteration=i, callbacks=checkpoint["callbacks"])
739
+
740
+ checkpoint["logs"]["all_outputs"].append(raw)
741
+ checkpoint["conv"].append({"role": "assistant", "content": raw})
742
+
743
+ if "FINISHED" in raw:
744
+ break
745
+
746
+ new_symbols, new_code, reasoning = self._extract_components(raw)
747
+ if new_symbols:
748
+ current_symbols = new_symbols
749
+ checkpoint["logs"]["all_symbols"].append(new_symbols)
750
+ if new_code:
751
+ current_code = new_code
752
+ checkpoint["logs"]["all_programs"].append(new_code)
753
+ if reasoning:
754
+ checkpoint["logs"]["all_reasoning"].append(reasoning)
755
+
756
+ exec_out, stdout, err = self._run_code(current_symbols, current_code, i, checkpoint["callbacks"], checkpoint["logs"])
757
+ checkpoint["symbols"] = current_symbols
758
+ checkpoint["code"] = current_code
759
+ checkpoint["exec_out"] = exec_out
760
+ checkpoint["stdout"] = stdout
761
+ checkpoint["err"] = err
762
+
763
+ # Restore interactive mode
764
+ self.interactive = original_interactive
765
+
766
+ # Clear checkpoint
767
+ self._checkpoint = None
768
+
769
+ final = f"FINAL ANSWER: {exec_out}"
770
+ return final, checkpoint["logs"]
771
+
772
+ def merge_user_feedback(self, critic_feedback: str, accept_critic: bool,
773
+ quoted_ranges: List[Dict]) -> str:
774
+ """
775
+ Merge critic feedback with user feedback.
776
+
777
+ Args:
778
+ critic_feedback: Original feedback from the critic
779
+ accept_critic: Whether to include critic's feedback
780
+ quoted_ranges: List of user feedback items (general comments, code feedback, symbol feedback)
781
+
782
+ Returns:
783
+ Merged feedback string
784
+ """
785
+ feedback_parts = []
786
+
787
+ if accept_critic and critic_feedback:
788
+ feedback_parts.append("AI Critic's feedback:")
789
+ feedback_parts.append(critic_feedback)
790
+
791
+ if quoted_ranges:
792
+ # Separate general comments from specific code/symbol feedback
793
+ general_comments = []
794
+ specific_feedback = []
795
+
796
+ for item in quoted_ranges:
797
+ if not item.get("comment"):
798
+ continue
799
+
800
+ if item.get("type") == "general" or not item.get("text"):
801
+ general_comments.append(item["comment"])
802
+ else:
803
+ specific_feedback.append(item)
804
+
805
+ # Add general user comments
806
+ if general_comments:
807
+ feedback_parts.append("User feedback:")
808
+ feedback_parts.extend(general_comments)
809
+
810
+ # Add specific code/symbol feedback
811
+ if specific_feedback:
812
+ feedback_parts.append("Specific code feedback:")
813
+ for item in specific_feedback:
814
+ feedback_parts.append(f"Regarding: {item['text']}")
815
+ feedback_parts.append(f"Comment: {item['comment']}")
816
+
817
+ return "\n\n".join(feedback_parts) if feedback_parts else "No specific issues identified."
818
+
819
+ # ========= SMALL UTILITY HELPERS (private) =====================
820
+
821
+ def _run_code(
822
+ self,
823
+ symbols: Any,
824
+ code: str,
825
+ iteration: int,
826
+ callbacks: CbMap,
827
+ logs: Dict[str, Any],
828
+ ) -> Tuple[str, str, str]:
829
+ """Execute candidate code, emit callbacks, store logs, return (out, stdout, err)."""
830
+ on_exec_start = callbacks.get("on_code_execution_start", lambda *a, **k: None)
831
+ on_exec_end = callbacks.get("on_code_execution_end", lambda *a, **k: None)
832
+ on_exec = callbacks.get("on_code_execution", lambda *a, **k: None)
833
+ max_time = callbacks.get("get_max_execution_time", lambda: 10)()
834
+
835
+ on_exec_start(iteration)
836
+ try:
837
+ out, std, err = python_eval(
838
+ f"{code}\nsymbols = {str(symbols)}\nanswer = solve(symbols)",
839
+ max_execution_time=max_time,
840
+ )
841
+ except Exception as e:
842
+ out, std, err = "None", "", str(e)
843
+
844
+ on_exec_end(iteration)
845
+ on_exec(iteration, str(out), std, err)
846
+ logs.setdefault("execution_results", []).append({"output": out, "stdout": std, "error": err})
847
+ return str(out), std, err
848
+
849
+ # ---------------------------------------------------------------
850
+
851
+ def _critic(
852
+ self,
853
+ question: str,
854
+ code: str,
855
+ symbols: Any,
856
+ out: str,
857
+ stdout: str,
858
+ err: str,
859
+ params: SamplingParams,
860
+ additional_rules: str = "",
861
+ stream: bool = False,
862
+ iteration: int = 1,
863
+ callbacks: Optional[CbMap] = None,
864
+ ) -> str:
865
+ """Ask the model to critique the code once per iteration."""
866
+ system_content = f"""You will be given a question and a code solution and you must judge the quality of the code for solving the problem.
867
+
868
+ Look for any of the following issues in the code:
869
+ - The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).
870
+ - The code should not return None unless "None" is the correct answer.
871
+ - The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.
872
+ - There should not be any example usage of the code.
873
+ - If there is a simpler way to solve the problem, please describe it.
874
+ - If there are any clear bugs in the code which impact the correctness of the answer, please describe them.
875
+ - If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.
876
+ - If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.
877
+
878
+ {"Additional issues and specifications to looks for: " if additional_rules else ""}
879
+ {additional_rules}
880
+
881
+ After analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."""
882
+
883
+ user_content = f"""Question: {question}
884
+
885
+ The following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.
886
+ ```json
887
+ {json.dumps(symbols, indent=2)}
888
+ ```
889
+
890
+ ```python
891
+ {code}
892
+ ```
893
+
894
+ Code execution result:
895
+ ```
896
+ Return value: {out}
897
+ Standard output: {stdout}
898
+ Exceptions: {err}
899
+ ```
900
+
901
+ Output a concrete and concise summary of only the issues that are present, do not include any code examples.
902
+ """
903
+
904
+ prompt = [
905
+ {"role": "system", "content": system_content},
906
+ {"role": "user", "content": user_content},
907
+ ]
908
+
909
+ # Create prompt details for the critic
910
+ critic_prompt_details = {
911
+ "description": f"Code quality analysis and critique (iteration {iteration})",
912
+ "conversation": prompt
913
+ }
914
+
915
+ # Emit step update with critic prompt details
916
+ callbacks = callbacks or {}
917
+ step = callbacks.get("on_step_update", lambda *a, **k: None)
918
+ step("code_checking", f"Running code critic (iteration {iteration})...", iteration=iteration, prompt_details=critic_prompt_details)
919
+
920
+ if not stream:
921
+ # Non-streaming path (backward compatibility)
922
+ return self.critic_model.chat(prompt, sampling_params=params, use_tqdm=False)[0].outputs[0].text
923
+
924
+ # Streaming path for code reviewer
925
+
926
+ # Create specialized callbacks for code reviewer streaming
927
+ def _make_reviewer_callbacks():
928
+ on_start = callbacks.get("on_code_check_streaming_start", lambda *a, **k: None)
929
+ on_token = callbacks.get("on_code_check_streaming_token", lambda *a, **k: None)
930
+ on_end = callbacks.get("on_code_check_streaming_end", lambda *a, **k: None)
931
+ interrupted = callbacks.get("check_interrupted", lambda: False)
932
+
933
+ def _emit(tok: str):
934
+ if not interrupted():
935
+ on_token(tok, iteration, "AI Code Reviewer")
936
+
937
+ return on_start, on_token, on_end, _emit
938
+
939
+ on_start, on_token, on_end, _emit = _make_reviewer_callbacks()
940
+
941
+ # Start streaming
942
+ model_name = "AI Code Reviewer"
943
+ on_start(iteration, model_name)
944
+
945
+ # Call streaming method
946
+ if hasattr(self.critic_model, "stream_chat"):
947
+ resp = self.critic_model.stream_chat(
948
+ prompt,
949
+ sampling_params=params,
950
+ emit_callback=_emit,
951
+ )
952
+ else:
953
+ # Fallback to regular chat with simulated streaming
954
+ resp = self.critic_model.chat(prompt, sampling_params=params, use_tqdm=False)
955
+
956
+ on_end(iteration, model_name)
957
+ return resp[0].outputs[0].text
958
+
959
+ # ---------------------------------------------------------------
960
+
961
+ def _fix_prompt(
962
+ self, question, code, symbols, out, stdout, err, feedback, has_user_feedback=False
963
+ ) -> str:
964
+ """Return the prompt that asks the LLM to fix problems."""
965
+ base_prompt = f"""Please fix the issues with the code and symbols or output "FINISHED".
966
+ The following is the result of evaluating the above code with the extracted symbols.
967
+ ```
968
+ Return value: {out}
969
+ Standard output: {stdout}
970
+ Exceptions: {err}
971
+ ```
972
+
973
+ The following is the summary of issues found with the code or the extracted symbols by another model:
974
+ ```
975
+ {feedback}
976
+ ```
977
+ """
978
+
979
+ if has_user_feedback:
980
+ emphasis = """
981
+ IMPORTANT: The feedback above includes specific user input that you MUST prioritize and address. Pay special attention to any user comments and requirements, as they represent critical guidance from the human user that should take precedence in your solution.
982
+ """
983
+ base_prompt += emphasis
984
+
985
+ base_prompt += """
986
+ If there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.
987
+ If stuck, try outputting different code to solve the problem in a different way.
988
+ You may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.
989
+ If everything is correct, output the word "FINISHED" and nothing else.
990
+ """
991
+ return base_prompt
src/pips/gradio_app.py ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio interface for the PIPS solver.
3
+
4
+ This module provides a lightweight alternative to the Socket.IO web
5
+ application defined in :mod:`pips.web_app`. It exposes a Gradio Blocks
6
+ layout that lets users supply API keys (kept in Gradio state), paste a
7
+ problem description, and optionally upload an image. The back-end uses
8
+ ``PIPSSolver.solve`` so that the same automatic mode selection between
9
+ chain-of-thought and iterative coding is applied.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ from typing import Any, Dict, Iterator, Optional, Tuple
16
+
17
+ import threading
18
+ from queue import Queue, Empty
19
+ import copy
20
+ import os
21
+ import tempfile
22
+ import time
23
+
24
+ SAVED_RUNS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "saved_examples"))
25
+
26
+ try:
27
+ import gradio as gr
28
+ from gradio import update
29
+ except ImportError as exc: # pragma: no cover - handled at runtime
30
+ raise ImportError(
31
+ "Gradio is required to run the PIPS Gradio app. "
32
+ "Install it via `pip install gradio`."
33
+ ) from exc
34
+
35
+ from .core import PIPSSolver
36
+ from .models import AVAILABLE_MODELS, get_model
37
+ from .utils import RawInput
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Helpers
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def _safe(obj: Any) -> Any:
45
+ """Best-effort conversion of solver logs into JSON-serialisable data."""
46
+ if obj is None or isinstance(obj, (str, int, float, bool)):
47
+ return obj
48
+ if isinstance(obj, dict):
49
+ return {str(k): _safe(v) for k, v in obj.items()}
50
+ if isinstance(obj, (list, tuple, set)):
51
+ return [_safe(x) for x in obj]
52
+ return repr(obj)
53
+
54
+
55
+ def _resolve_api_key(model_id: str, keys: Dict[str, str]) -> Optional[str]:
56
+ """Return the correct API key for a model based on its provider prefix."""
57
+ if any(model_id.startswith(prefix) for prefix in ("gpt", "o3", "o4")):
58
+ return keys.get("openai") or None
59
+ if "gemini" in model_id:
60
+ return keys.get("google") or None
61
+ if "claude" in model_id:
62
+ return keys.get("anthropic") or None
63
+ return None
64
+
65
+
66
+ def _update_api_keys(openai_key: str, google_key: str, anthropic_key: str, state: Dict[str, str] | None):
67
+ """Update the in-memory API key state."""
68
+ new_state = dict(state or {})
69
+ if openai_key.strip():
70
+ new_state["openai"] = openai_key.strip()
71
+ if google_key.strip():
72
+ new_state["google"] = google_key.strip()
73
+ if anthropic_key.strip():
74
+ new_state["anthropic"] = anthropic_key.strip()
75
+ message = "API keys updated in local session state."
76
+ if not any([openai_key.strip(), google_key.strip(), anthropic_key.strip()]):
77
+ message = "Cleared API keys from local session state."
78
+ new_state = {}
79
+ return new_state, message
80
+
81
+
82
+ PREPOPULATED_EXAMPLES: Dict[str, Dict[str, Any]] = {
83
+ "iterative_coding": {
84
+ "name": "Demo: Iterative Coding (Factorial)",
85
+ "problem": "Calculate the factorial of 6 using Python code and explain the method.",
86
+ "history": [
87
+ {
88
+ "role": "user",
89
+ "content": "Calculate the factorial of 6 using Python code and explain the method.",
90
+ "metadata": {"component": "user", "title": "User"},
91
+ },
92
+ {
93
+ "role": "assistant",
94
+ "content": (
95
+ "```json\n{\n \"n\": 6\n}\n```\n\n"
96
+ "```python\ndef solve(symbols):\n n = symbols['n']\n result = 1\n for i in range(2, n + 1):\n result *= i\n return result\n```"
97
+ ),
98
+ "metadata": {"component": "solver", "title": "🧠 Solver (iteration 0) · Demo Model"},
99
+ },
100
+ {
101
+ "role": "assistant",
102
+ "content": "Mode chosen: Iterative coding",
103
+ "metadata": {"component": "mode_result", "title": "Mode Choice"},
104
+ },
105
+ {
106
+ "role": "assistant",
107
+ "content": "**Final Answer:** 720\n\n**Method:** Iterative coding",
108
+ "metadata": {"component": "summary", "title": "Summary"},
109
+ },
110
+ ],
111
+ "symbols": {"n": 6},
112
+ "code": "def solve(symbols):\n n = symbols['n']\n result = 1\n for i in range(2, n + 1):\n result *= i\n return result",
113
+ "status": "Demo example: iterative coding (precomputed).",
114
+ },
115
+ "chain_of_thought": {
116
+ "name": "Demo: Chain-of-Thought (Word Problem)",
117
+ "problem": "John has 3 apples and buys 4 more. He then gives 2 to a friend. How many apples does he have now?",
118
+ "history": [
119
+ {
120
+ "role": "user",
121
+ "content": "John has 3 apples and buys 4 more. He then gives 2 to a friend. How many apples does he have now?",
122
+ "metadata": {"component": "user", "title": "User"},
123
+ },
124
+ {
125
+ "role": "assistant",
126
+ "content": "John starts with 3 apples. After buying 4 more, he has 3 + 4 = 7 apples. Giving away 2 leaves 5 apples.",
127
+ "metadata": {"component": "solver", "title": "🧠 Solver (reasoning)"},
128
+ },
129
+ {
130
+ "role": "assistant",
131
+ "content": "Mode chosen: Chain-of-thought reasoning",
132
+ "metadata": {"component": "mode_result", "title": "Mode Choice"},
133
+ },
134
+ {
135
+ "role": "assistant",
136
+ "content": "**Final Answer:** 5\n\n**Method:** Chain-of-thought reasoning",
137
+ "metadata": {"component": "summary", "title": "Summary"},
138
+ },
139
+ ],
140
+ "symbols": None,
141
+ "code": "",
142
+ "status": "Demo example: chain-of-thought reasoning (precomputed).",
143
+ },
144
+ }
145
+
146
+ # Override with streamlined demo definitions
147
+ PREPOPULATED_EXAMPLES = {
148
+ "iterative_coding": {
149
+ "name": "Demo: Iterative Coding (Factorial)",
150
+ "problem": "Calculate the factorial of 6 using Python code and explain the method.",
151
+ "history": [
152
+ {
153
+ "role": "user",
154
+ "content": "Calculate the factorial of 6 using Python code and explain the method.",
155
+ "metadata": {"component": "user", "title": "User"},
156
+ },
157
+ {
158
+ "role": "assistant",
159
+ "content": (
160
+ "```json\n{\n \"n\": 6\n}\n```\n\n"
161
+ "```python\ndef solve(symbols):\n n = symbols['n']\n result = 1\n for i in range(2, n + 1):\n result *= i\n return result\n```"
162
+ ),
163
+ "metadata": {"component": "solver", "title": "🧠 Solver (iteration 0) · Demo Model"},
164
+ },
165
+ {
166
+ "role": "assistant",
167
+ "content": "Mode chosen: Iterative coding",
168
+ "metadata": {"component": "mode_result", "title": "Mode Choice"},
169
+ },
170
+ {
171
+ "role": "assistant",
172
+ "content": "**Final Answer:** 720\n\n**Method:** Iterative coding",
173
+ "metadata": {"component": "summary", "title": "Summary"},
174
+ },
175
+ ],
176
+ "symbols": {"n": 6},
177
+ "code": "def solve(symbols):\n n = symbols['n']\n result = 1\n for i in range(2, n + 1):\n result *= i\n return result",
178
+ "status": "Demo example: iterative coding (precomputed).",
179
+ "method": "Iterative coding",
180
+ "decision": {"use_code": True},
181
+ },
182
+ "chain_of_thought": {
183
+ "name": "Demo: Chain-of-Thought (Word Problem)",
184
+ "problem": "John has 3 apples and buys 4 more. He then gives 2 to a friend. How many apples does he have now?",
185
+ "history": [
186
+ {
187
+ "role": "user",
188
+ "content": "John has 3 apples and buys 4 more. He then gives 2 to a friend. How many apples does he have now?",
189
+ "metadata": {"component": "user", "title": "User"},
190
+ },
191
+ {
192
+ "role": "assistant",
193
+ "content": "John starts with 3 apples. After buying 4 more, he has 7 apples. Giving 2 away leaves 5 apples.",
194
+ "metadata": {"component": "solver", "title": "🧠 Solver (reasoning)"},
195
+ },
196
+ {
197
+ "role": "assistant",
198
+ "content": "Mode chosen: Chain-of-thought reasoning",
199
+ "metadata": {"component": "mode_result", "title": "Mode Choice"},
200
+ },
201
+ {
202
+ "role": "assistant",
203
+ "content": "**Final Answer:** 5\n\n**Method:** Chain-of-thought reasoning",
204
+ "metadata": {"component": "summary", "title": "Summary"},
205
+ },
206
+ ],
207
+ "symbols": None,
208
+ "code": "",
209
+ "status": "Demo example: chain-of-thought reasoning (precomputed).",
210
+ "method": "Chain-of-thought reasoning",
211
+ "decision": {"use_code": False},
212
+ },
213
+ }
214
+
215
+
216
+ def _example_choices() -> list[tuple[str, str]]:
217
+ choices = [(key, data["name"]) for key, data in PREPOPULATED_EXAMPLES.items()]
218
+ choices.insert(0, ("", "Select a demo example"))
219
+ return choices
220
+
221
+
222
+ def _saved_run_choices() -> list[tuple[str, str]]:
223
+ """Return available saved run files as dropdown choices."""
224
+ choices: list[tuple[str, str]] = [("", "Select a saved run")]
225
+ if os.path.isdir(SAVED_RUNS_DIR):
226
+ for name in sorted(os.listdir(SAVED_RUNS_DIR)):
227
+ if name.lower().endswith(".json"):
228
+ path = os.path.join(SAVED_RUNS_DIR, name)
229
+ choices.append((name.split(".")[0], name))
230
+ return choices
231
+
232
+
233
+ def _extract_problem_from_history(history: Any) -> str:
234
+ """Take the first user message content from a conversation history."""
235
+ if not isinstance(history, list):
236
+ return ""
237
+ for message in history:
238
+ if isinstance(message, dict) and message.get("role") == "user":
239
+ content = message.get("content")
240
+ if isinstance(content, str):
241
+ return content
242
+ return ""
243
+
244
+
245
+ def _fill_example_problem(example_key: str):
246
+ example = PREPOPULATED_EXAMPLES.get(example_key)
247
+ if not example:
248
+ return update()
249
+ return update(value=example["problem"])
250
+
251
+
252
+ def _preview_example(example_key: str):
253
+ example = PREPOPULATED_EXAMPLES.get(example_key)
254
+ if not example:
255
+ return update(), update(), update(), update(), update(value="Select a demo example to preview."), {}
256
+
257
+ history = copy.deepcopy(example["history"])
258
+ symbols = example.get("symbols")
259
+ code = example.get("code", "")
260
+ status = example.get("status", "Demo example")
261
+ method = example.get("method", "")
262
+ decision = example.get("decision")
263
+
264
+ symbols_update = update(value=symbols, visible=symbols is not None)
265
+ code_update = update(value=code, visible=bool(code))
266
+
267
+ record = {
268
+ "problem": example["problem"],
269
+ "history": history,
270
+ "symbols": _safe(symbols),
271
+ "code": code,
272
+ "status": status,
273
+ "method": method,
274
+ "decision": _safe(decision),
275
+ "steps": [],
276
+ "timestamp": time.time(),
277
+ }
278
+
279
+ status_update = update(value=status)
280
+
281
+ return history, update(value=example["problem"]), symbols_update, code_update, status_update, record
282
+
283
+
284
+ def _load_saved_run(file_path: Optional[str]):
285
+ """Load a saved solver run from a JSON export."""
286
+ if file_path is None:
287
+ raise gr.Error("Select a saved run first.")
288
+
289
+ if isinstance(file_path, list):
290
+ if not file_path:
291
+ raise gr.Error("Select a saved run first.")
292
+ file_path = file_path[0]
293
+
294
+ if not isinstance(file_path, str):
295
+ raise gr.Error("Invalid saved run selection.")
296
+
297
+ file_path = file_path.strip()
298
+ if not file_path:
299
+ raise gr.Error("Select a saved run first.")
300
+
301
+ abs_path = os.path.abspath(SAVED_RUNS_DIR + "/" + file_path)
302
+ saved_dir = os.path.abspath(SAVED_RUNS_DIR)
303
+ try:
304
+ if os.path.commonpath([abs_path, saved_dir]) != saved_dir:
305
+ raise gr.Error("Saved run must be located in the saved examples directory.")
306
+ except ValueError as exc: # pragma: no cover - platform dependent
307
+ raise gr.Error("Saved run must be located in the saved examples directory.")
308
+
309
+ if not os.path.isfile(abs_path):
310
+ raise gr.Error(f"Saved run not found: {abs_path}")
311
+
312
+ try:
313
+ with open(abs_path, "r", encoding="utf-8") as handle:
314
+ data = json.load(handle)
315
+ except FileNotFoundError as exc:
316
+ raise gr.Error(f"Could not read saved run: {abs_path}") from exc
317
+ except json.JSONDecodeError as exc:
318
+ raise gr.Error(f"Saved run is not valid JSON: {exc}") from exc
319
+ except OSError as exc: # pragma: no cover - depends on filesystem
320
+ raise gr.Error(f"Failed to read saved run: {exc}") from exc
321
+
322
+ history = data.get("history")
323
+ if not isinstance(history, list):
324
+ raise gr.Error("Saved run JSON must include a `history` list.")
325
+
326
+ history_copy = copy.deepcopy(history)
327
+ symbols = data.get("symbols")
328
+ code = data.get("code", "")
329
+ status = data.get("status", "Loaded saved run.")
330
+ method = data.get("method", "")
331
+ decision = data.get("decision")
332
+ problem = _extract_problem_from_history(history_copy) or data.get("problem", "")
333
+ steps = data.get("steps", [])
334
+ timestamp = data.get("timestamp", time.time())
335
+
336
+ symbols_visible = symbols is not None
337
+ symbols_value = _safe(symbols) if symbols_visible else None
338
+ symbols_update = update(value=symbols_value, visible=symbols_visible)
339
+
340
+ code_visible = bool(code)
341
+ code_update = update(value=code if code_visible else "", visible=code_visible)
342
+
343
+ record = {
344
+ "problem": problem,
345
+ "history": history_copy,
346
+ "symbols": _safe(symbols),
347
+ "code": code,
348
+ "status": status,
349
+ "method": method,
350
+ "decision": _safe(decision),
351
+ "steps": _safe(steps),
352
+ "timestamp": timestamp,
353
+ }
354
+
355
+ status_update = update(value=status)
356
+
357
+ return (
358
+ history_copy,
359
+ update(value=problem),
360
+ symbols_update,
361
+ code_update,
362
+ status_update,
363
+ record,
364
+ )
365
+
366
+
367
+ def _refresh_saved_runs():
368
+ """Refresh saved run dropdown choices."""
369
+ return update(choices=_saved_run_choices())
370
+
371
+
372
+ def _download_run(run_state: Optional[Dict[str, Any]]):
373
+ if not run_state:
374
+ raise gr.Error("Run the solver or preview a demo example first.")
375
+
376
+ # fd, path = tempfile.mkstemp(prefix="pips_run_", suffix=".json")
377
+ # save to saved_examples
378
+ if not os.path.isdir(SAVED_RUNS_DIR):
379
+ os.makedirs(SAVED_RUNS_DIR, exist_ok=True)
380
+ path = os.path.join(SAVED_RUNS_DIR, f"pips_run_{int(time.time())}.json")
381
+ with open(path, "w", encoding="utf-8") as handle:
382
+ json.dump(run_state, handle, indent=2)
383
+ return update(value=path, visible=True)
384
+
385
+
386
+ def _stream_solver(
387
+ problem_text: str,
388
+ image,
389
+ generator_model_id: str,
390
+ critic_model_id: str,
391
+ max_iterations: int,
392
+ temperature: float,
393
+ max_tokens: int,
394
+ max_execution_time: int,
395
+ api_keys_state: Dict[str, str] | None,
396
+ previous_state: Optional[Dict[str, Any]] = None,
397
+ ) -> Iterator[Tuple[list[Dict[str, Any]], Any, Any, Any, str, Optional[Dict[str, Any]]]]:
398
+ """Stream solver progress to the Gradio Chatbot."""
399
+ text = (problem_text or "").strip()
400
+ last_state = previous_state
401
+
402
+ if not text:
403
+ history = [
404
+ {
405
+ "role": "assistant",
406
+ "content": "❌ Please provide a problem statement before solving.",
407
+ "metadata": {"component": "status", "title": "Status"},
408
+ },
409
+ ]
410
+ status = "❌ Problem text missing."
411
+
412
+ yield (
413
+ history,
414
+ update(),
415
+ update(value=None, visible=False),
416
+ update(value="", visible=False),
417
+ status,
418
+ last_state,
419
+ )
420
+ return
421
+
422
+ keys = api_keys_state or {}
423
+ generator_api_key = _resolve_api_key(generator_model_id, keys)
424
+ critic_api_key = _resolve_api_key(critic_model_id, keys)
425
+
426
+ history: list[Dict[str, Any]] = [
427
+ {
428
+ "role": "user",
429
+ "content": text,
430
+ "metadata": {"component": "user", "title": "User"},
431
+ }
432
+ ]
433
+ symbols_output: Optional[Dict[str, Any]] = None
434
+ code_output = ""
435
+ status = "🔄 Preparing solver..."
436
+
437
+ def emit(state_override: Optional[Dict[str, Any]] = None):
438
+ nonlocal last_state
439
+ if symbols_output is not None:
440
+ symbols_update = update(value=symbols_output, visible=True)
441
+ code_visible = bool(code_output)
442
+ code_update = update(value=code_output if code_visible else "", visible=code_visible)
443
+ else:
444
+ symbols_update = update(value=None, visible=False)
445
+ code_update = update(value="", visible=False)
446
+
447
+ state_value = last_state
448
+ if state_override is not None:
449
+ last_state = state_override
450
+ state_value = state_override
451
+
452
+ return (
453
+ history,
454
+ update(),
455
+ symbols_update,
456
+ code_update,
457
+ status,
458
+ state_value,
459
+ )
460
+
461
+ yield emit()
462
+
463
+ if not generator_api_key:
464
+ error_msg = f"❌ Missing API key for generator model `{generator_model_id}`."
465
+ status = error_msg
466
+ symbols_output = None
467
+ code_output = ""
468
+ yield emit()
469
+ return
470
+
471
+ try:
472
+ generator_model = get_model(generator_model_id, generator_api_key)
473
+ except Exception as exc: # pragma: no cover - depends on SDK
474
+ error_msg = f"❌ Failed to initialise generator model `{generator_model_id}`: {exc}"
475
+ status = error_msg
476
+ symbols_output = None
477
+ code_output = ""
478
+ yield emit()
479
+ return
480
+
481
+ critic_model = generator_model
482
+ if critic_model_id != generator_model_id and critic_api_key:
483
+ try:
484
+ critic_model = get_model(critic_model_id, critic_api_key)
485
+ except Exception as exc: # pragma: no cover
486
+ error_msg = f"❌ Failed to initialise critic model `{critic_model_id}`: {exc}"
487
+ status = error_msg
488
+ symbols_output = None
489
+ code_output = ""
490
+ yield emit()
491
+ return
492
+
493
+ events: "Queue[Tuple[str, Any]]" = Queue()
494
+ active_messages: Dict[Tuple[str, int], int] = {}
495
+ last_status: Optional[str] = None
496
+ mode_selection_index: Optional[int] = None
497
+
498
+ def push(event: str, payload: Any):
499
+ events.put((event, payload))
500
+
501
+ steps: list[Dict[str, Any]] = []
502
+ current_response: str = ""
503
+
504
+ def on_step_update(step, message, iteration=None, prompt_details=None, **_):
505
+ steps.append(
506
+ {
507
+ "step": step,
508
+ "message": message,
509
+ "iteration": iteration,
510
+ "prompt_details": _safe(prompt_details),
511
+ }
512
+ )
513
+ push("status", {"text": message, "step": step})
514
+
515
+ def on_llm_streaming_start(iteration, model_name):
516
+ push("solver_start", {"iteration": iteration, "model": model_name})
517
+
518
+ def on_llm_streaming_token(token, iteration, model_name):
519
+ push("solver_token", {"token": token, "iteration": iteration, "model": model_name})
520
+
521
+ def on_llm_streaming_end(iteration, model_name):
522
+ push("status", {"text": f"Completed solver response from {model_name} (iteration {iteration}).", "step": "solver_end"})
523
+
524
+ def on_code_check_streaming_start(iteration, model_name):
525
+ push("critic_start", {"iteration": iteration, "model": model_name})
526
+
527
+ def on_code_check_streaming_token(token, iteration, model_name):
528
+ push("critic_token", {"token": token, "iteration": iteration, "model": model_name})
529
+
530
+ def on_code_check_streaming_end(iteration, model_name):
531
+ push("status", {"text": f"Completed critic feedback from {model_name} (iteration {iteration}).", "step": "critic_end"})
532
+
533
+ callbacks = dict(
534
+ on_step_update=on_step_update,
535
+ on_llm_streaming_start=on_llm_streaming_start,
536
+ on_llm_streaming_token=on_llm_streaming_token,
537
+ on_llm_streaming_end=on_llm_streaming_end,
538
+ on_code_check_streaming_start=on_code_check_streaming_start,
539
+ on_code_check_streaming_token=on_code_check_streaming_token,
540
+ on_code_check_streaming_end=on_code_check_streaming_end,
541
+ check_interrupted=lambda: False,
542
+ get_max_execution_time=lambda: max_execution_time,
543
+ )
544
+
545
+ solver = PIPSSolver(
546
+ generator_model,
547
+ max_iterations=max_iterations,
548
+ temperature=temperature,
549
+ max_tokens=max_tokens,
550
+ interactive=False,
551
+ critic_model=critic_model,
552
+ )
553
+
554
+ sample = RawInput(text_input=problem_text, image_input=image)
555
+
556
+ def worker():
557
+ try:
558
+ answer, logs, decision = solver.solve(
559
+ sample,
560
+ stream=True,
561
+ callbacks=callbacks,
562
+ additional_rules="",
563
+ decision_max_tokens=min(1024, max_tokens),
564
+ interactive_requested=False,
565
+ )
566
+ events.put(("final", (answer, logs, decision)))
567
+ except Exception as exc: # pragma: no cover
568
+ events.put(("error", str(exc)))
569
+ finally:
570
+ events.put(("done", None))
571
+
572
+ thread = threading.Thread(target=worker, daemon=True)
573
+ thread.start()
574
+
575
+ try:
576
+ while True:
577
+ event, payload = events.get()
578
+
579
+ if event == "status":
580
+ if isinstance(payload, dict):
581
+ text = payload.get("text") or ""
582
+ step_name = payload.get("step")
583
+ else:
584
+ text = str(payload)
585
+ step_name = None
586
+
587
+ status = text
588
+
589
+ if step_name == "mode_selection":
590
+ if text:
591
+ history.append({
592
+ "role": "assistant",
593
+ "content": text,
594
+ "metadata": {"component": "mode_selection", "title": "Mode Selection"},
595
+ })
596
+ mode_selection_index = len(history) - 1
597
+ last_status = text
598
+ yield emit()
599
+ else:
600
+ last_status = text
601
+ yield emit()
602
+
603
+ elif event == "solver_start":
604
+ iteration = payload.get("iteration")
605
+ model = payload.get("model", "Solver")
606
+ label = f"🧠 Solver (iteration {iteration}) · {model}"
607
+ history.append({
608
+ "role": "assistant",
609
+ "content": "",
610
+ "metadata": {"component": "solver", "title": label},
611
+ })
612
+ idx = len(history) - 1
613
+ active_messages[("solver", iteration)] = idx
614
+ current_response = ""
615
+ yield emit()
616
+
617
+ elif event == "solver_token":
618
+ iteration = payload.get("iteration")
619
+ token = payload.get("token", "")
620
+ model_name = payload.get("model", "Solver")
621
+ idx = active_messages.get(("solver", iteration))
622
+ if idx is not None:
623
+ entry = history[idx]
624
+ entry["content"] += token
625
+ else:
626
+ entry = {
627
+ "role": "assistant",
628
+ "content": token,
629
+ "metadata": {"component": "solver", "title": f"🧠 Solver (iteration {iteration}) · {model_name}"},
630
+ }
631
+ history.append(entry)
632
+ idx = len(history) - 1
633
+ active_messages[("solver", iteration)] = idx
634
+ current_response = history[idx]["content"]
635
+ yield emit()
636
+
637
+ elif event == "critic_start":
638
+ iteration = payload.get("iteration")
639
+ model = payload.get("model", "Critic")
640
+ label = f"🧾 Critic (iteration {iteration}) · {model}"
641
+ history.append({
642
+ "role": "assistant",
643
+ "content": "",
644
+ "metadata": {"component": "critic", "title": label},
645
+ })
646
+ idx = len(history) - 1
647
+ active_messages[("critic", iteration)] = idx
648
+ yield emit()
649
+
650
+ elif event == "critic_token":
651
+ iteration = payload.get("iteration")
652
+ token = payload.get("token", "")
653
+ model_name = payload.get("model", "Critic")
654
+ idx = active_messages.get(("critic", iteration))
655
+ if idx is not None:
656
+ history[idx]["content"] += token
657
+ else:
658
+ entry = {
659
+ "role": "assistant",
660
+ "content": token,
661
+ "metadata": {"component": "critic", "title": f"🧾 Critic (iteration {iteration}) · {model_name}"},
662
+ }
663
+ history.append(entry)
664
+ idx = len(history) - 1
665
+ active_messages[("critic", iteration)] = idx
666
+ yield emit()
667
+
668
+ elif event == "error":
669
+ status = f"❌ Solver error: {payload}"
670
+ history.append({
671
+ "role": "assistant",
672
+ "content": status,
673
+ "metadata": {"component": "error", "title": "Error"},
674
+ })
675
+ yield emit()
676
+
677
+ elif event == "final":
678
+ final_answer, logs, decision = payload
679
+
680
+ if not isinstance(logs, dict) or logs is None:
681
+ logs = {}
682
+ logs.setdefault("steps", steps)
683
+
684
+ use_code = decision.get("use_code") if isinstance(decision, dict) else False
685
+
686
+ symbols_output = None
687
+ code_output = ""
688
+
689
+ method_label = "Iterative coding" if use_code else "Chain-of-thought reasoning"
690
+
691
+ if use_code:
692
+ symbols = logs.get("all_symbols") or []
693
+ programs = logs.get("all_programs") or []
694
+ if symbols:
695
+ symbols_output = _safe(symbols[-1])
696
+ if programs:
697
+ code_output = programs[-1] or ""
698
+ status = "✅ Completed (iterative coding)."
699
+ else:
700
+ symbols_output = None
701
+ code_output = ""
702
+ status = "✅ Completed (chain-of-thought)."
703
+
704
+ mode_choice_entry = {
705
+ "role": "assistant",
706
+ "content": f"Mode chosen: {method_label}",
707
+ "metadata": {"component": "mode_result", "title": "Mode Choice"},
708
+ }
709
+ if mode_selection_index is not None:
710
+ history.insert(mode_selection_index + 1, mode_choice_entry)
711
+ else:
712
+ history.append(mode_choice_entry)
713
+
714
+ summary_text = final_answer or ""
715
+ if not summary_text:
716
+ summary_text = status
717
+ summary_text = f"**Final Answer:** {summary_text}\n\n**Method:** {method_label}"
718
+ history.append({
719
+ "role": "assistant",
720
+ "content": summary_text,
721
+ "metadata": {"component": "summary", "title": "Summary"},
722
+ })
723
+
724
+ run_record = {
725
+ "problem": text,
726
+ "history": copy.deepcopy(history),
727
+ "symbols": _safe(symbols_output),
728
+ "code": code_output,
729
+ "status": status,
730
+ "method": method_label,
731
+ "decision": _safe(decision),
732
+ "steps": _safe(steps),
733
+ "timestamp": time.time(),
734
+ }
735
+
736
+ yield emit(run_record)
737
+
738
+ elif event == "done":
739
+ break
740
+
741
+ finally:
742
+ # Drain any remaining events to avoid dangling threads.
743
+ while True:
744
+ try:
745
+ events.get_nowait()
746
+ except Empty:
747
+ break
748
+
749
+
750
+ # ---------------------------------------------------------------------------
751
+ # Public interface
752
+ # ---------------------------------------------------------------------------
753
+
754
+ def build_blocks() -> gr.Blocks:
755
+ """Construct the Gradio Blocks layout."""
756
+ with gr.Blocks() as demo:
757
+ gr.Markdown(
758
+ """
759
+ ## PIPS
760
+ Automatically chooses between chain-of-thought reasoning and program synthesis for each input.
761
+ """
762
+ )
763
+
764
+ api_state = gr.State({})
765
+ run_state = gr.State({})
766
+
767
+ with gr.Row(equal_height=True):
768
+ with gr.Column(scale=5):
769
+ gr.Markdown("### API Keys")
770
+ with gr.Row():
771
+ openai_key = gr.Textbox(label="OpenAI", type="password", placeholder="sk-...")
772
+ google_key = gr.Textbox(label="Google", type="password", placeholder="AIza...")
773
+ anthropic_key = gr.Textbox(label="Anthropic", type="password", placeholder="sk-ant-...")
774
+ update_message = gr.Markdown("")
775
+ update_btn = gr.Button("Save Keys", variant="secondary")
776
+ update_btn.click(
777
+ fn=_update_api_keys,
778
+ inputs=[openai_key, google_key, anthropic_key, api_state],
779
+ outputs=[api_state, update_message],
780
+ queue=False,
781
+ )
782
+
783
+ # gr.Markdown("### Demo Examples")
784
+ # example_dropdown = gr.Dropdown(
785
+ # choices=_example_choices(),
786
+ # value="",
787
+ # label="Choose a demo example",
788
+ # )
789
+ # with gr.Row():
790
+ # preview_btn = gr.Button("Preview Example", variant="secondary")
791
+
792
+ gr.Markdown("### Examples")
793
+ with gr.Row():
794
+ saved_run_dropdown = gr.Dropdown(
795
+ choices=_saved_run_choices(),
796
+ value="",
797
+ label="Example",
798
+ interactive=True,
799
+ )
800
+ # refresh_saved_runs_btn = gr.Button("Refresh", variant="secondary")
801
+ load_btn = gr.Button("Load Example", variant="secondary")
802
+
803
+ gr.Markdown("### Problem")
804
+ problem = gr.Textbox(
805
+ label="Problem Description",
806
+ lines=10,
807
+ placeholder="Describe the task you want PIPS to solve.",
808
+ )
809
+ image = gr.Image(label="Optional Image", type="pil")
810
+
811
+ gr.Markdown("### Models & Limits")
812
+ generator_model = gr.Dropdown(
813
+ choices=list(AVAILABLE_MODELS.keys()),
814
+ value=next(iter(AVAILABLE_MODELS)),
815
+ label="Generator Model",
816
+ interactive=True,
817
+ )
818
+ critic_model = gr.Dropdown(
819
+ choices=list(AVAILABLE_MODELS.keys()),
820
+ value=next(iter(AVAILABLE_MODELS)),
821
+ label="Critic Model",
822
+ interactive=True,
823
+ )
824
+
825
+ with gr.Row():
826
+ max_iterations = gr.Slider(1, 15, value=8, step=1, label="Iterations")
827
+ temperature = gr.Slider(0.0, 2.0, value=0.0, step=0.1, label="Temperature")
828
+
829
+ with gr.Row():
830
+ max_tokens = gr.Slider(512, 8192, value=4096, step=256, label="Max Tokens")
831
+ max_exec_time = gr.Slider(1, 60, value=10, step=1, label="Exec Timeout (s)")
832
+
833
+ solve_button = gr.Button("Solve", variant="primary")
834
+
835
+ status_md = gr.Markdown(value="Ready to solve.", label="Status")
836
+ symbols_json = gr.JSON(label="Symbols (iterative coding)", visible=False)
837
+ code_output = gr.Code(label="Final Program", language="python", visible=False)
838
+ # download_btn = gr.Button("Download Last Run", variant="secondary")
839
+ download_file = gr.File(label="Run Export", visible=False)
840
+
841
+ with gr.Column(scale=7):
842
+ chatbot = gr.Chatbot(
843
+ label="Solver Log",
844
+ type="messages",
845
+ height=550,
846
+ )
847
+
848
+ solve_button.click(
849
+ fn=_stream_solver,
850
+ inputs=[
851
+ problem,
852
+ image,
853
+ generator_model,
854
+ critic_model,
855
+ max_iterations,
856
+ temperature,
857
+ max_tokens,
858
+ max_exec_time,
859
+ api_state,
860
+ run_state,
861
+ ],
862
+ outputs=[chatbot, problem, symbols_json, code_output, status_md, run_state],
863
+ queue=True,
864
+ )
865
+
866
+ # example_dropdown.change(
867
+ # fn=_fill_example_problem,
868
+ # inputs=[example_dropdown],
869
+ # outputs=[problem],
870
+ # )
871
+
872
+ # preview_btn.click(
873
+ # fn=_preview_example,
874
+ # inputs=[example_dropdown],
875
+ # outputs=[chatbot, problem, symbols_json, code_output, status_md, run_state],
876
+ # queue=False,
877
+ # )
878
+
879
+ load_btn.click(
880
+ fn=_load_saved_run,
881
+ inputs=[saved_run_dropdown],
882
+ outputs=[chatbot, problem, symbols_json, code_output, status_md, run_state],
883
+ queue=False,
884
+ )
885
+
886
+ # refresh_saved_runs_btn.click(
887
+ # fn=_refresh_saved_runs,
888
+ # outputs=[saved_run_dropdown],
889
+ # queue=False,
890
+ # )
891
+
892
+ # download_btn.click(
893
+ # fn=_download_run,
894
+ # inputs=[run_state],
895
+ # outputs=[download_file],
896
+ # queue=False,
897
+ # )
898
+
899
+ return demo
900
+
901
+
902
+ def launch(**kwargs): # pragma: no cover - thin wrapper
903
+ """Launch the Gradio interface."""
904
+ return build_blocks().launch(**kwargs)
905
+
906
+
907
+ __all__ = ["build_blocks", "launch"]
908
+
909
+
910
+ if __name__ == "__main__": # pragma: no cover
911
+ launch()
src/pips/model_registry.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model registry for PIPS - centralized model management.
3
+
4
+ This module provides a pluggable model registry that makes it easy to add
5
+ new models from different providers without modifying the core codebase.
6
+ """
7
+
8
+ from typing import Dict, Any, Optional
9
+
10
+ # Internal registry storage
11
+ _registry: Dict[str, Dict[str, Any]] = {}
12
+
13
+ def register_model(name: str, provider: str, display: str = "", **config):
14
+ """
15
+ Register a new model in the registry.
16
+
17
+ Args:
18
+ name: Unique model identifier
19
+ provider: Provider name (openai, google, anthropic)
20
+ display: Human-readable display name
21
+ **config: Additional configuration parameters
22
+ """
23
+ _registry[name] = {
24
+ "provider": provider,
25
+ "display": display or name,
26
+ **config
27
+ }
28
+
29
+ def list_models() -> Dict[str, Dict[str, Any]]:
30
+ """
31
+ Get all registered models.
32
+
33
+ Returns:
34
+ Dictionary mapping model names to their configuration
35
+ """
36
+ return _registry.copy()
37
+
38
+ def get_model_config(name: str) -> Optional[Dict[str, Any]]:
39
+ """
40
+ Get configuration for a specific model.
41
+
42
+ Args:
43
+ name: Model identifier
44
+
45
+ Returns:
46
+ Model configuration or None if not found
47
+ """
48
+ return _registry.get(name)
49
+
50
+ def get_available_models() -> Dict[str, str]:
51
+ """
52
+ Get available models in the format expected by the UI.
53
+
54
+ Returns:
55
+ Dictionary mapping model IDs to display names
56
+ """
57
+ return {name: config["display"] for name, config in _registry.items()}
58
+
59
+ # Initialize with default models
60
+ def _initialize_default_models():
61
+ """Initialize the registry with default models."""
62
+
63
+ # OpenAI Models
64
+ register_model("gpt-4.1-2025-04-14", "openai", "OpenAI GPT-4.1")
65
+ register_model("gpt-4o-2024-08-06", "openai", "OpenAI GPT-4o")
66
+ register_model("gpt-4.1-mini-2025-04-14", "openai", "OpenAI GPT-4.1 Mini")
67
+ register_model("gpt-4o-mini", "openai", "OpenAI GPT-4o Mini")
68
+ register_model("o4-mini-2025-04-16", "openai", "OpenAI o4 Mini")
69
+ register_model("o3-2025-04-16", "openai", "OpenAI o3")
70
+
71
+ # Google Models
72
+ register_model("gemini-2.0-flash", "google", "Google Gemini 2.0 Flash")
73
+ register_model("gemini-2.0-flash-codeinterpreter", "google", "Google Gemini 2.0 Flash (Code Interpreter)")
74
+
75
+ # Anthropic Models
76
+ register_model("claude-sonnet-4-20250514", "anthropic", "Anthropic Claude 4 Sonnet")
77
+ register_model("claude-opus-4-20250514", "anthropic", "Anthropic Claude 4 Opus")
78
+ register_model("claude-3-5-haiku-latest", "anthropic", "Anthropic Claude 3.5 Haiku")
79
+
80
+ # Initialize default models when module is imported
81
+ _initialize_default_models()
src/pips/models.py ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM model interfaces for PIPS.
3
+
4
+ This module provides a unified interface for various LLM providers including
5
+ OpenAI, Google Gemini, and Anthropic Claude models.
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import json
11
+ import re
12
+ from openai import OpenAI
13
+ from typing import List, Dict, Any, Optional
14
+
15
+ try:
16
+ import anthropic
17
+ except ImportError:
18
+ anthropic = None
19
+
20
+ try:
21
+ from google import genai
22
+ from google.genai import types
23
+ except ImportError:
24
+ genai = None
25
+ types = None
26
+
27
+ from .utils import RawInput, img2base64, base642img
28
+
29
+
30
+ class SamplingParams:
31
+ """
32
+ Sampling parameters for LLM generation.
33
+
34
+ Args:
35
+ temperature (float): Sampling temperature (0.0 to 2.0)
36
+ max_tokens (int): Maximum number of tokens to generate
37
+ top_p (float): Nucleus sampling parameter
38
+ n (int): Number of completions to generate
39
+ stop (list): List of stop sequences
40
+ """
41
+ def __init__(self, temperature=0.0, max_tokens=4096, top_p=0.9, n=1, stop=None):
42
+ self.temperature = temperature
43
+ self.max_tokens = max_tokens
44
+ self.top_p = top_p
45
+ self.n = n
46
+ self.stop = stop
47
+
48
+
49
+ class LLMModel:
50
+ """
51
+ Base class for LLM models.
52
+
53
+ Provides a common interface for all LLM providers with lazy initialization
54
+ and both regular and streaming chat capabilities.
55
+ """
56
+
57
+ def __init__(self, model_name: str):
58
+ self.model_name = model_name
59
+ self._client = None
60
+ self._initialized = False
61
+
62
+ def _ensure_initialized(self):
63
+ """Ensure the model client is initialized before use."""
64
+ if not self._initialized:
65
+ self._initialize_client()
66
+ self._initialized = True
67
+
68
+ def _initialize_client(self):
69
+ """Initialize the client - to be implemented by subclasses."""
70
+ raise NotImplementedError
71
+
72
+ def chat(self, prompt: List[Dict], sampling_params: SamplingParams, use_tqdm=False):
73
+ """
74
+ Generate response using the model.
75
+
76
+ Args:
77
+ prompt: List of message dictionaries in OpenAI format
78
+ sampling_params: Sampling configuration
79
+ use_tqdm: Whether to show progress bar (unused in base implementation)
80
+
81
+ Returns:
82
+ List containing Outputs object with generated text
83
+ """
84
+ self._ensure_initialized()
85
+ return self._chat_impl(prompt, sampling_params, use_tqdm)
86
+
87
+ def _chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, use_tqdm=False):
88
+ """Actual chat implementation - to be implemented by subclasses."""
89
+ raise NotImplementedError
90
+
91
+ def stream_chat(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
92
+ """
93
+ Stream response using the model with callback for each token.
94
+
95
+ Default implementation falls back to regular chat with simulated streaming.
96
+
97
+ Args:
98
+ prompt: List of message dictionaries in OpenAI format
99
+ sampling_params: Sampling configuration
100
+ emit_callback: Function to call for each generated token
101
+ interrupted_callback: Function to check if streaming should be interrupted
102
+
103
+ Returns:
104
+ List containing Outputs object with generated text
105
+ """
106
+ # Get the full response
107
+ result = self.chat(prompt, sampling_params, use_tqdm=False)
108
+ full_response = result[0].outputs[0].text
109
+
110
+ # Simulate streaming by emitting tokens immediately
111
+ if emit_callback and full_response:
112
+ # Split response into reasonable chunks (words/punctuation)
113
+ words = re.findall(r'\S+|\s+', full_response)
114
+ for word in words:
115
+ # Check for interruption before emitting each word
116
+ if interrupted_callback and interrupted_callback():
117
+ break
118
+ if emit_callback:
119
+ emit_callback(word)
120
+
121
+ return result
122
+
123
+
124
+ class OpenAIModel(LLMModel):
125
+ """
126
+ OpenAI GPT model interface.
127
+
128
+ Supports GPT-4, GPT-4o, o3, and o4 model families with proper handling
129
+ of different model requirements (reasoning effort for o3/o4 models).
130
+ """
131
+
132
+ def __init__(self, model_name: str, api_key: Optional[str] = None):
133
+ super().__init__(model_name)
134
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
135
+ if not self.api_key:
136
+ raise ValueError("OpenAI API key not provided and OPENAI_API_KEY environment variable not set")
137
+
138
+ def _initialize_client(self):
139
+ """Initialize OpenAI client with appropriate settings."""
140
+ self._client = OpenAI(
141
+ api_key=self.api_key,
142
+ timeout=900000000,
143
+ max_retries=3,
144
+ )
145
+
146
+ def _create_completion_with_retry(self, model, messages, max_attempts=5, delay_seconds=2, **kwargs):
147
+ """
148
+ Call chat.completions.create with retry logic.
149
+
150
+ Args:
151
+ model: Model name to use
152
+ messages: List of message dictionaries
153
+ max_attempts: Maximum number of retry attempts
154
+ delay_seconds: Delay between retries
155
+ **kwargs: Additional arguments for the API call
156
+
157
+ Returns:
158
+ OpenAI ChatCompletion response
159
+
160
+ Raises:
161
+ Exception: If all retry attempts fail
162
+ """
163
+ if not self._client:
164
+ raise RuntimeError("Client not initialized")
165
+
166
+ last_exception = None
167
+ for attempt in range(max_attempts):
168
+ try:
169
+ response = self._client.chat.completions.create(
170
+ model=model,
171
+ messages=messages,
172
+ **kwargs
173
+ )
174
+ return response
175
+ except Exception as e:
176
+ last_exception = e
177
+ if attempt < max_attempts - 1:
178
+ time.sleep(delay_seconds)
179
+ else:
180
+ raise last_exception
181
+
182
+ if last_exception:
183
+ raise last_exception
184
+ return None
185
+
186
+ def _chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, use_tqdm=False):
187
+ """Implementation of chat for OpenAI models."""
188
+ extra_args = {}
189
+
190
+ # Configure parameters based on model type
191
+ if "o3" in self.model_name or "o4" in self.model_name:
192
+ # Reasoning models have special parameters
193
+ extra_args["reasoning_effort"] = "medium"
194
+ extra_args["max_completion_tokens"] = 20000
195
+ extra_args["n"] = sampling_params.n
196
+ else:
197
+ # Standard models
198
+ extra_args["max_completion_tokens"] = sampling_params.max_tokens
199
+ extra_args["n"] = sampling_params.n
200
+ extra_args["temperature"] = sampling_params.temperature
201
+ extra_args["top_p"] = sampling_params.top_p
202
+
203
+ response = self._create_completion_with_retry(
204
+ model=self.model_name,
205
+ messages=prompt,
206
+ **extra_args
207
+ )
208
+
209
+ # Create response wrapper classes
210
+ class Outputs:
211
+ def __init__(self, outputs):
212
+ self.outputs = outputs
213
+
214
+ class Text:
215
+ def __init__(self, text):
216
+ self.text = text
217
+
218
+ if hasattr(response, 'usage') and response.usage.completion_tokens > 0:
219
+ return [Outputs([Text(response.choices[i].message.content) for i in range(sampling_params.n)])]
220
+ else:
221
+ return [Outputs([Text("") for i in range(sampling_params.n)])]
222
+
223
+ def stream_chat(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
224
+ """Stream response using OpenAI's streaming API."""
225
+ self._ensure_initialized()
226
+ return self._stream_chat_impl(prompt, sampling_params, emit_callback, interrupted_callback)
227
+
228
+ def _stream_chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
229
+ """Implementation of streaming chat for OpenAI models."""
230
+ if not self._client:
231
+ raise RuntimeError("Client not initialized")
232
+
233
+ extra_args = {}
234
+
235
+ # Configure parameters based on model type
236
+ if "o3" in self.model_name or "o4" in self.model_name:
237
+ extra_args["reasoning_effort"] = "medium"
238
+ extra_args["max_completion_tokens"] = 20000
239
+ else:
240
+ extra_args["max_completion_tokens"] = sampling_params.max_tokens
241
+ extra_args["temperature"] = sampling_params.temperature
242
+ extra_args["top_p"] = sampling_params.top_p
243
+
244
+ try:
245
+ stream = self._client.chat.completions.create(
246
+ model=self.model_name,
247
+ messages=prompt,
248
+ stream=True,
249
+ **extra_args
250
+ )
251
+
252
+ full_response = ""
253
+ for chunk in stream:
254
+ # Check for interruption before processing each chunk
255
+ if interrupted_callback and interrupted_callback():
256
+ # Stop streaming immediately if interrupted
257
+ break
258
+
259
+ if chunk.choices[0].delta.content is not None:
260
+ token = chunk.choices[0].delta.content
261
+ full_response += token
262
+ if emit_callback:
263
+ emit_callback(token)
264
+
265
+ # Return in the same format as the non-streaming version
266
+ class Outputs:
267
+ def __init__(self, outputs):
268
+ self.outputs = outputs
269
+
270
+ class Text:
271
+ def __init__(self, text):
272
+ self.text = text
273
+
274
+ return [Outputs([Text(full_response)])]
275
+
276
+ except Exception as e:
277
+ raise e
278
+
279
+
280
+ class GoogleModel(LLMModel):
281
+ """
282
+ Google Gemini model interface.
283
+
284
+ Supports both standard Gemini models and code interpreter variants
285
+ through different API endpoints.
286
+ """
287
+
288
+ def __init__(self, model_name: str, api_key: Optional[str] = None):
289
+ super().__init__(model_name)
290
+ self.api_key = api_key or os.getenv("GOOGLE_API_KEY")
291
+ if not self.api_key:
292
+ raise ValueError("Google API key not provided and GOOGLE_API_KEY environment variable not set")
293
+
294
+ # Determine which provider to use based on model name
295
+ if "codeinterpreter" in model_name:
296
+ self.provider = "google-genai"
297
+ else:
298
+ self.provider = "google"
299
+
300
+ def _initialize_client(self):
301
+ """Initialize Google client based on provider type."""
302
+ if self.provider == "google-genai":
303
+ if not genai:
304
+ raise ImportError("google-genai library not installed. Install with: pip install google-genai")
305
+ self._client = genai.Client(api_key=self.api_key, http_options=types.HttpOptions(timeout=60*1000))
306
+ else:
307
+ # Use OpenAI-compatible API endpoint
308
+ self._client = OpenAI(
309
+ api_key=self.api_key,
310
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
311
+ timeout=900000000,
312
+ max_retries=3,
313
+ )
314
+
315
+ def _chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, use_tqdm=False):
316
+ """Implementation of chat for Google models."""
317
+ if self.provider == "google-genai":
318
+ return self._chat_genai(prompt, sampling_params)
319
+ else:
320
+ return self._chat_openai_compatible(prompt, sampling_params)
321
+
322
+ def _chat_genai(self, prompt: List[Dict], sampling_params: SamplingParams):
323
+ """Chat implementation using Google GenAI library."""
324
+ # Convert OpenAI format to Google GenAI format
325
+ genai_contents = []
326
+ for message in prompt:
327
+ role = message["role"]
328
+ content = message["content"]
329
+
330
+ if isinstance(content, str):
331
+ genai_contents.append(
332
+ types.Content(
333
+ role=role,
334
+ parts=[types.Part(text=content)]
335
+ )
336
+ )
337
+ elif isinstance(content, list):
338
+ parts = []
339
+ for item in content:
340
+ if item["type"] == "text":
341
+ parts.append(types.Part(text=item["text"]))
342
+ elif item["type"] == "image_url":
343
+ img_url = item["image_url"]["url"]
344
+ if img_url.startswith("data:image"):
345
+ # Handle base64 encoded images
346
+ base64_data = img_url.split(",")[1]
347
+ parts.append(
348
+ types.Part(
349
+ inline_data=types.Blob(
350
+ mime_type="image/jpeg",
351
+ data=base64_data
352
+ )
353
+ )
354
+ )
355
+ else:
356
+ # Handle image URLs
357
+ parts.append(
358
+ types.Part(
359
+ file_data=types.FileData(
360
+ file_uri=img_url,
361
+ mime_type="image/jpeg"
362
+ )
363
+ )
364
+ )
365
+ if parts:
366
+ genai_contents.append(
367
+ types.Content(
368
+ role=role,
369
+ parts=parts
370
+ )
371
+ )
372
+
373
+ response = self._client.models.generate_content(
374
+ model=self.model_name.replace("-codeinterpreter", ""),
375
+ contents=genai_contents,
376
+ config=types.GenerateContentConfig(
377
+ tools=[types.Tool(
378
+ code_execution=types.ToolCodeExecution
379
+ )],
380
+ temperature=sampling_params.temperature,
381
+ max_output_tokens=sampling_params.max_tokens,
382
+ )
383
+ )
384
+
385
+ # Process response including code execution results
386
+ response_text = ""
387
+ code_execution_results = []
388
+
389
+ if response.candidates is not None:
390
+ for candidate in response.candidates:
391
+ if candidate.content is not None:
392
+ for part in candidate.content.parts:
393
+ if part.text is not None:
394
+ response_text += part.text
395
+
396
+ if part.executable_code is not None:
397
+ executable_code = part.executable_code
398
+ if executable_code.code is not None:
399
+ code_execution_results.append({
400
+ 'code': executable_code.code,
401
+ })
402
+
403
+ if part.code_execution_result is not None:
404
+ code_result = part.code_execution_result
405
+ if code_result.output is not None:
406
+ code_execution_results.append({
407
+ 'output': code_result.output,
408
+ })
409
+
410
+ # Format final response with code execution results
411
+ final_response = ""
412
+ if code_execution_results:
413
+ for result in code_execution_results:
414
+ if "code" in result:
415
+ final_response += f"Code:\n{result['code']}\n"
416
+ if "output" in result:
417
+ final_response += f"Output:\n{result['output']}\n"
418
+ final_response += response_text
419
+
420
+ class Outputs:
421
+ def __init__(self, outputs):
422
+ self.outputs = outputs
423
+
424
+ class Text:
425
+ def __init__(self, text):
426
+ self.text = text
427
+
428
+ return [Outputs([Text(final_response)])]
429
+
430
+ def _chat_openai_compatible(self, prompt: List[Dict], sampling_params: SamplingParams):
431
+ """Chat implementation using OpenAI-compatible API."""
432
+ response = self._client.chat.completions.create(
433
+ model=self.model_name,
434
+ messages=prompt,
435
+ max_completion_tokens=sampling_params.max_tokens,
436
+ n=sampling_params.n,
437
+ temperature=sampling_params.temperature,
438
+ top_p=sampling_params.top_p,
439
+ )
440
+
441
+ class Outputs:
442
+ def __init__(self, outputs):
443
+ self.outputs = outputs
444
+
445
+ class Text:
446
+ def __init__(self, text):
447
+ self.text = text
448
+
449
+ if response.usage.completion_tokens > 0:
450
+ return [Outputs([Text(response.choices[i].message.content) for i in range(sampling_params.n)])]
451
+ else:
452
+ return [Outputs([Text("") for i in range(sampling_params.n)])]
453
+
454
+ def stream_chat(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
455
+ """Stream response using Google models."""
456
+ self._ensure_initialized()
457
+ return self._stream_chat_impl(prompt, sampling_params, emit_callback, interrupted_callback)
458
+
459
+ def _stream_chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
460
+ """Implementation of streaming chat for Google models."""
461
+ if self.provider == "google-genai":
462
+ return self._stream_chat_genai(prompt, sampling_params, emit_callback, interrupted_callback)
463
+ else:
464
+ return self._stream_chat_openai_compatible(prompt, sampling_params, emit_callback, interrupted_callback)
465
+
466
+ def _stream_chat_genai(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
467
+ """Stream chat using Google GenAI - simulates streaming as API doesn't support it."""
468
+ # Google GenAI doesn't support streaming yet, so we'll get the full response and simulate streaming
469
+ result = self._chat_genai(prompt, sampling_params)
470
+ full_response = result[0].outputs[0].text
471
+
472
+ # Simulate streaming by emitting tokens immediately
473
+ if emit_callback and full_response:
474
+ # Split response into reasonable chunks (words/punctuation)
475
+ words = re.findall(r'\S+|\s+', full_response)
476
+ for word in words:
477
+ # Check for interruption before emitting each word
478
+ if interrupted_callback and interrupted_callback():
479
+ break
480
+ if emit_callback:
481
+ emit_callback(word)
482
+
483
+ return result
484
+
485
+ def _stream_chat_openai_compatible(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
486
+ """Stream chat using OpenAI-compatible Google API."""
487
+ if not self._client:
488
+ raise RuntimeError("Client not initialized")
489
+
490
+ try:
491
+ stream = self._client.chat.completions.create(
492
+ model=self.model_name,
493
+ messages=prompt,
494
+ max_completion_tokens=sampling_params.max_tokens,
495
+ temperature=sampling_params.temperature,
496
+ top_p=sampling_params.top_p,
497
+ stream=True
498
+ )
499
+
500
+ full_response = ""
501
+ for chunk in stream:
502
+ # Check for interruption before processing each chunk
503
+ if interrupted_callback and interrupted_callback():
504
+ break
505
+
506
+ if chunk.choices[0].delta.content is not None:
507
+ token = chunk.choices[0].delta.content
508
+ full_response += token
509
+ if emit_callback:
510
+ emit_callback(token)
511
+
512
+ # Return in the same format as the non-streaming version
513
+ class Outputs:
514
+ def __init__(self, outputs):
515
+ self.outputs = outputs
516
+
517
+ class Text:
518
+ def __init__(self, text):
519
+ self.text = text
520
+
521
+ return [Outputs([Text(full_response)])]
522
+
523
+ except Exception as e:
524
+ raise e
525
+
526
+
527
+ class AnthropicModel(LLMModel):
528
+ """
529
+ Anthropic Claude model interface.
530
+
531
+ Supports Claude models with proper message format conversion
532
+ and streaming capabilities.
533
+ """
534
+
535
+ def __init__(self, model_name: str, api_key: Optional[str] = None):
536
+ super().__init__(model_name)
537
+ self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
538
+ if not self.api_key:
539
+ raise ValueError("Anthropic API key not provided and ANTHROPIC_API_KEY environment variable not set")
540
+
541
+ if not anthropic:
542
+ raise ImportError("anthropic library not installed. Install with: pip install anthropic")
543
+
544
+ def _initialize_client(self):
545
+ """Initialize Anthropic client."""
546
+ self._client = anthropic.Anthropic(api_key=self.api_key)
547
+
548
+ def _convert_messages(self, prompt: List[Dict]) -> tuple:
549
+ """
550
+ Convert OpenAI format messages to Anthropic format.
551
+
552
+ Args:
553
+ prompt: List of message dictionaries in OpenAI format
554
+
555
+ Returns:
556
+ Tuple of (system_message, messages) where messages are in Anthropic format
557
+ """
558
+ system_message = ""
559
+ anthropic_messages = []
560
+
561
+ for message in prompt:
562
+ role = message["role"]
563
+ content = message["content"]
564
+
565
+ if role == "system":
566
+ system_message = content if isinstance(content, str) else content[0]["text"]
567
+ else:
568
+ # Convert role names
569
+ if role == "assistant":
570
+ anthropic_role = "assistant"
571
+ else:
572
+ anthropic_role = "user"
573
+
574
+ # Handle content format
575
+ if isinstance(content, str):
576
+ anthropic_content = content
577
+ elif isinstance(content, list):
578
+ # Handle multimodal content
579
+ anthropic_content = []
580
+ for item in content:
581
+ if item["type"] == "text":
582
+ anthropic_content.append({
583
+ "type": "text",
584
+ "text": item["text"]
585
+ })
586
+ elif item["type"] == "image_url":
587
+ img_url = item["image_url"]["url"]
588
+ if img_url.startswith("data:image"):
589
+ # Extract base64 data and media type
590
+ header, base64_data = img_url.split(",", 1)
591
+ media_type = header.split(";")[0].split(":")[1]
592
+ anthropic_content.append({
593
+ "type": "image",
594
+ "source": {
595
+ "type": "base64",
596
+ "media_type": media_type,
597
+ "data": base64_data
598
+ }
599
+ })
600
+ else:
601
+ anthropic_content = str(content)
602
+
603
+ anthropic_messages.append({
604
+ "role": anthropic_role,
605
+ "content": anthropic_content
606
+ })
607
+
608
+ return system_message, anthropic_messages
609
+
610
+ def _chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, use_tqdm=False):
611
+ """Implementation of chat for Anthropic models."""
612
+ system_message, anthropic_messages = self._convert_messages(prompt)
613
+
614
+ # Prepare API call arguments
615
+ kwargs = {
616
+ "model": self.model_name,
617
+ "messages": anthropic_messages,
618
+ "max_tokens": sampling_params.max_tokens,
619
+ "temperature": sampling_params.temperature,
620
+ "top_p": sampling_params.top_p,
621
+ }
622
+
623
+ if system_message:
624
+ kwargs["system"] = system_message
625
+
626
+ if sampling_params.stop:
627
+ kwargs["stop_sequences"] = sampling_params.stop
628
+
629
+ response = self._client.messages.create(**kwargs)
630
+
631
+ # Extract text from response
632
+ response_text = ""
633
+ for content_block in response.content:
634
+ if content_block.type == "text":
635
+ response_text += content_block.text
636
+
637
+ # Create response wrapper classes
638
+ class Outputs:
639
+ def __init__(self, outputs):
640
+ self.outputs = outputs
641
+
642
+ class Text:
643
+ def __init__(self, text):
644
+ self.text = text
645
+
646
+ return [Outputs([Text(response_text)])]
647
+
648
+ def stream_chat(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
649
+ """Stream response using Anthropic's streaming API."""
650
+ self._ensure_initialized()
651
+ return self._stream_chat_impl(prompt, sampling_params, emit_callback, interrupted_callback)
652
+
653
+ def _stream_chat_impl(self, prompt: List[Dict], sampling_params: SamplingParams, emit_callback=None, interrupted_callback=None):
654
+ """Implementation of streaming chat for Anthropic models."""
655
+ if not self._client:
656
+ raise RuntimeError("Client not initialized")
657
+
658
+ system_message, anthropic_messages = self._convert_messages(prompt)
659
+
660
+ # Prepare API call arguments
661
+ kwargs = {
662
+ "model": self.model_name,
663
+ "messages": anthropic_messages,
664
+ "max_tokens": sampling_params.max_tokens,
665
+ "temperature": sampling_params.temperature,
666
+ "top_p": sampling_params.top_p,
667
+ "stream": True,
668
+ }
669
+
670
+ if system_message:
671
+ kwargs["system"] = system_message
672
+
673
+ if sampling_params.stop:
674
+ kwargs["stop_sequences"] = sampling_params.stop
675
+
676
+ try:
677
+ full_response = ""
678
+
679
+ with self._client.messages.stream(**kwargs) as stream:
680
+ for text in stream.text_stream:
681
+ # Check for interruption before processing each text chunk
682
+ if interrupted_callback and interrupted_callback():
683
+ break
684
+
685
+ full_response += text
686
+ if emit_callback:
687
+ emit_callback(text)
688
+
689
+ # Return in the same format as the non-streaming version
690
+ class Outputs:
691
+ def __init__(self, outputs):
692
+ self.outputs = outputs
693
+
694
+ class Text:
695
+ def __init__(self, text):
696
+ self.text = text
697
+
698
+ return [Outputs([Text(full_response)])]
699
+
700
+ except Exception as e:
701
+ raise e
702
+
703
+
704
+ def get_model(model_name: str, api_key: Optional[str] = None) -> LLMModel:
705
+ """
706
+ Factory function to get the appropriate model instance.
707
+
708
+ Args:
709
+ model_name: Name of the model to instantiate
710
+ api_key: Optional API key (will use environment variable if not provided)
711
+
712
+ Returns:
713
+ LLMModel instance for the specified model
714
+
715
+ Raises:
716
+ ValueError: If the model is not supported
717
+ """
718
+ model_name_lower = model_name.lower()
719
+
720
+ if any(model_name_lower.startswith(model) for model in ["gpt", "o3", "o4"]):
721
+ return OpenAIModel(model_name, api_key)
722
+ elif "gemini" in model_name_lower:
723
+ return GoogleModel(model_name, api_key)
724
+ elif "claude" in model_name_lower:
725
+ return AnthropicModel(model_name, api_key)
726
+ else:
727
+ raise ValueError(f"Unsupported model: {model_name}")
728
+
729
+
730
+ # Import models from the registry
731
+ from .model_registry import get_available_models
732
+
733
+ # Available models - now pulled from the registry
734
+ AVAILABLE_MODELS = get_available_models()
src/pips/prompts.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt templates used by the PIPS web application.
3
+
4
+ These definitions mirror the variants maintained in ``scripts/algorithmic_eval.py``
5
+ so that the web server can operate when the broader research repository is not
6
+ installed as a Python package.
7
+ """
8
+
9
+ CHOOSE_CONSERVATIVE_COT_VS_CODE_PROMPT = """
10
+ You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.
11
+
12
+ **IMPORTANT:**
13
+ - This is a hypothetical evaluation.
14
+ - **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**
15
+ - Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.
16
+
17
+ Solution Expectations:
18
+ - You may assume standard library modules are allowed for code.
19
+ - You may NOT call external services, APIs, databases, or other LLMs.
20
+ - The code must be self-contained and executable without internet access.
21
+ - Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.
22
+
23
+ **CRITICAL GUIDANCE:**
24
+ - **Be cautious, not optimistic.**
25
+ Overestimating your capabilities will lead to choosing a method you cannot successfully complete.
26
+ - **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**
27
+ - **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.
28
+ - **Use conservative estimates.**
29
+ - If unsure between two options, **prefer lower probabilities rather than guessing high**.
30
+
31
+ Here are the self-reflection sub-questions you must answer hypothetically:
32
+
33
+ 1. **Simple Formalizability** — *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*
34
+
35
+ 2. **Straightforward Executability** — *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*
36
+
37
+ 3. **Robust Systematic Search** — *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*
38
+
39
+ 4. **Manageable State Representation** — *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*
40
+
41
+ 5. **Structured Knowledge Encoding** — *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*
42
+
43
+ 6. **Hallucination Risk Reduction** — *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*
44
+
45
+ 7. **Arithmetic and Data Processing Advantage** — *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*
46
+
47
+ 8. **Branching and Case Handling Advantage** — *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*
48
+
49
+ 9. **Algorithmic Reliability Over Heuristics** — *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*
50
+
51
+ 10. **Overall Comparative Success** — *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*
52
+
53
+ After thoroughly reasoning through each criterion:
54
+
55
+ - Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:
56
+ - Scores 1–10 correspond to the ten sub-questions above.
57
+
58
+ **Additional Instructions:**
59
+ - Explicitly reason through each criterion carefully before giving a probability.
60
+ - If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.
61
+ - Make sure to put only the list after FINAL ANSWER.
62
+ - **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**
63
+
64
+ TARGET QUESTION:
65
+ """
66
+
67
+ __all__ = ["CHOOSE_CONSERVATIVE_COT_VS_CODE_PROMPT"]
src/pips/saved_examples/movies.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "problem": "Chain of thought completed!",
3
+ "history": [
4
+ {
5
+ "role": "user",
6
+ "content": "Which of these movies is the action movie: Die Hard, The Notebook, or The Internship?",
7
+ "metadata": {
8
+ "component": "user",
9
+ "title": "User"
10
+ }
11
+ },
12
+ {
13
+ "role": "assistant",
14
+ "content": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
15
+ "metadata": {
16
+ "component": "mode_selection",
17
+ "title": "Mode Selection"
18
+ }
19
+ },
20
+ {
21
+ "role": "assistant",
22
+ "content": "Mode chosen: Chain-of-thought reasoning",
23
+ "metadata": {
24
+ "component": "mode_result",
25
+ "title": "Mode Choice"
26
+ }
27
+ },
28
+ {
29
+ "role": "assistant",
30
+ "content": "Let's analyze each movie:\n\n1. **Die Hard**: This movie is well-known for its intense sequences, gunfights, and thrilling plot involving a police officer fighting terrorists in a skyscraper. It is widely recognized as a classic action movie.\n\n2. **The Notebook**: This film is a romantic drama about a love story between two people. It is not considered an action movie.\n\n3. **The Internship**: This is a comedy about two men who intern at Google. It is not an action movie.\n\nBased on the descriptions, the action movie among the three is \"Die Hard.\"\n\nFINAL ANSWER: Die Hard",
31
+ "metadata": {
32
+ "component": "solver",
33
+ "title": "\ud83e\udde0 Solver (iteration 0) \u00b7 OpenAIModel"
34
+ }
35
+ },
36
+ {
37
+ "role": "assistant",
38
+ "content": "**Final Answer:** FINAL ANSWER: Die Hard\n\n**Method:** Chain-of-thought reasoning",
39
+ "metadata": {
40
+ "component": "summary",
41
+ "title": "Summary"
42
+ }
43
+ }
44
+ ],
45
+ "symbols": null,
46
+ "code": "",
47
+ "status": "\u2705 Completed (chain-of-thought).",
48
+ "method": "Chain-of-thought reasoning",
49
+ "decision": {
50
+ "use_code": false,
51
+ "scores": [
52
+ 0.2,
53
+ 0.1,
54
+ 0.05,
55
+ 0.5,
56
+ 0.2,
57
+ 0.2,
58
+ 0.05,
59
+ 0.1,
60
+ 0.1,
61
+ 0.1
62
+ ],
63
+ "average_score": 0.16,
64
+ "raw_response": "Let's reason through each criterion in the context of the target question:\n\n1. **Simple Formalizability** \n The question asks to identify which movie is an action movie from a list. If I had a database or mapping of movie genres, this could be formalized. However, with only standard library and no external data, I would need to encode the genre knowledge myself. This is not a simple, deterministic code task unless the knowledge is hardcoded. Thus, the probability is low.\n\n2. **Straightforward Executability** \n Any code I write would need to include the genre information for these movies. Unless I hardcode the mapping, the code would not execute correctly. Even with hardcoding, there's a risk of error in encoding the correct genres. So, the probability is low.\n\n3. **Robust Systematic Search** \n Systematic search (e.g., brute-force) is not applicable unless I have a dataset to search. With only standard library and no external data, systematic search is not possible. Probability is very low.\n\n4. **Manageable State Representation** \n The state (movie names and their genres) is simple if hardcoded, but the risk is in accurate knowledge encoding. Still, representing the state itself is manageable. Probability is moderate.\n\n5. **Structured Knowledge Encoding** \n The required knowledge (which movie is an action movie) is not in the standard library and must be encoded manually. This is not \"neatly\" encoded unless I already know the genres and hardcode them. Probability is low.\n\n6. **Hallucination Risk Reduction** \n Code would only be as reliable as the knowledge I encode. If I make a mistake in the mapping, code could hallucinate just as much as chain-of-thought. Probability is low.\n\n7. **Arithmetic and Data Processing Advantage** \n There is no arithmetic or data processing advantage here; the task is about factual knowledge. Probability is very low.\n\n8. **Branching and Case Handling Advantage** \n There are only three options, so branching is trivial. No significant advantage for code. Probability is low.\n\n9. **Algorithmic Reliability Over Heuristics** \n The problem is not algorithmic; it's factual. Code is only as reliable as the hardcoded knowledge. Probability is low.\n\n10. **Overall Comparative Success** \n Chain-of-thought reasoning (using general knowledge) is likely more reliable than code, unless the code is perfectly hardcoded. Given the risk of error in encoding, code is less reliable overall. Probability is low.\n\nNow, assigning conservative probabilities:\n\n1. 0.2 \n2. 0.1 \n3. 0.05 \n4. 0.5 \n5. 0.2 \n6. 0.2 \n7. 0.05 \n8. 0.1 \n9. 0.1 \n10. 0.1 \n\nFINAL ANSWER: [0.2, 0.1, 0.05, 0.5, 0.2, 0.2, 0.05, 0.1, 0.1, 0.1]",
65
+ "prompt": [
66
+ {
67
+ "role": "user",
68
+ "content": [
69
+ {
70
+ "type": "text",
71
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
72
+ },
73
+ {
74
+ "type": "text",
75
+ "text": "TARGET QUESTION:\nWhich of these movies is the action movie: Die Hard, The Notebook, or The Internship?"
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ "error": null
81
+ },
82
+ "steps": [
83
+ {
84
+ "step": "mode_selection",
85
+ "message": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
86
+ "iteration": null,
87
+ "prompt_details": {
88
+ "description": "Choosing between chain-of-thought and iterative coding",
89
+ "conversation": [
90
+ {
91
+ "role": "user",
92
+ "content": [
93
+ {
94
+ "type": "text",
95
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
96
+ },
97
+ {
98
+ "type": "text",
99
+ "text": "TARGET QUESTION:\nWhich of these movies is the action movie: Die Hard, The Notebook, or The Internship?"
100
+ }
101
+ ]
102
+ }
103
+ ]
104
+ }
105
+ },
106
+ {
107
+ "step": "mode_selection",
108
+ "message": "Average code suitability score: 0.16. Proceeding with chain-of-thought reasoning.",
109
+ "iteration": null,
110
+ "prompt_details": {
111
+ "description": "Choosing between chain-of-thought and iterative coding",
112
+ "conversation": [
113
+ {
114
+ "role": "user",
115
+ "content": [
116
+ {
117
+ "type": "text",
118
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
119
+ },
120
+ {
121
+ "type": "text",
122
+ "text": "TARGET QUESTION:\nWhich of these movies is the action movie: Die Hard, The Notebook, or The Internship?"
123
+ }
124
+ ]
125
+ }
126
+ ],
127
+ "raw_response": "Let's reason through each criterion in the context of the target question:\n\n1. **Simple Formalizability** \n The question asks to identify which movie is an action movie from a list. If I had a database or mapping of movie genres, this could be formalized. However, with only standard library and no external data, I would need to encode the genre knowledge myself. This is not a simple, deterministic code task unless the knowledge is hardcoded. Thus, the probability is low.\n\n2. **Straightforward Executability** \n Any code I write would need to include the genre information for these movies. Unless I hardcode the mapping, the code would not execute correctly. Even with hardcoding, there's a risk of error in encoding the correct genres. So, the probability is low.\n\n3. **Robust Systematic Search** \n Systematic search (e.g., brute-force) is not applicable unless I have a dataset to search. With only standard library and no external data, systematic search is not possible. Probability is very low.\n\n4. **Manageable State Representation** \n The state (movie names and their genres) is simple if hardcoded, but the risk is in accurate knowledge encoding. Still, representing the state itself is manageable. Probability is moderate.\n\n5. **Structured Knowledge Encoding** \n The required knowledge (which movie is an action movie) is not in the standard library and must be encoded manually. This is not \"neatly\" encoded unless I already know the genres and hardcode them. Probability is low.\n\n6. **Hallucination Risk Reduction** \n Code would only be as reliable as the knowledge I encode. If I make a mistake in the mapping, code could hallucinate just as much as chain-of-thought. Probability is low.\n\n7. **Arithmetic and Data Processing Advantage** \n There is no arithmetic or data processing advantage here; the task is about factual knowledge. Probability is very low.\n\n8. **Branching and Case Handling Advantage** \n There are only three options, so branching is trivial. No significant advantage for code. Probability is low.\n\n9. **Algorithmic Reliability Over Heuristics** \n The problem is not algorithmic; it's factual. Code is only as reliable as the hardcoded knowledge. Probability is low.\n\n10. **Overall Comparative Success** \n Chain-of-thought reasoning (using general knowledge) is likely more reliable than code, unless the code is perfectly hardcoded. Given the risk of error in encoding, code is less reliable overall. Probability is low.\n\nNow, assigning conservative probabilities:\n\n1. 0.2 \n2. 0.1 \n3. 0.05 \n4. 0.5 \n5. 0.2 \n6. 0.2 \n7. 0.05 \n8. 0.1 \n9. 0.1 \n10. 0.1 \n\nFINAL ANSWER: [0.2, 0.1, 0.05, 0.5, 0.2, 0.2, 0.05, 0.1, 0.1, 0.1]",
128
+ "error": null
129
+ }
130
+ },
131
+ {
132
+ "step": "reasoning",
133
+ "message": "Thinking step-by-step...",
134
+ "iteration": null,
135
+ "prompt_details": {
136
+ "description": "Chain of thought reasoning",
137
+ "conversation": [
138
+ {
139
+ "role": "user",
140
+ "content": "Question: Which of these movies is the action movie: Die Hard, The Notebook, or The Internship?\nAnswer step-by-step and finish with 'FINAL ANSWER:'."
141
+ }
142
+ ]
143
+ }
144
+ },
145
+ {
146
+ "step": "finished",
147
+ "message": "Chain of thought completed!",
148
+ "iteration": null,
149
+ "prompt_details": null
150
+ }
151
+ ],
152
+ "timestamp": 1760569049.2332404
153
+ }
src/pips/saved_examples/time_arithmetic.json ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "problem": "Solution completed successfully!",
3
+ "history": [
4
+ {
5
+ "role": "user",
6
+ "content": "Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...]).",
7
+ "metadata": {
8
+ "component": "user",
9
+ "title": "User"
10
+ }
11
+ },
12
+ {
13
+ "role": "assistant",
14
+ "content": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
15
+ "metadata": {
16
+ "component": "mode_selection",
17
+ "title": "Mode Selection"
18
+ }
19
+ },
20
+ {
21
+ "role": "assistant",
22
+ "content": "Mode chosen: Iterative coding",
23
+ "metadata": {
24
+ "component": "mode_result",
25
+ "title": "Mode Choice"
26
+ }
27
+ },
28
+ {
29
+ "role": "assistant",
30
+ "content": "Let's break down the problem step by step and extract all relevant information in a structured JSON format.\n\n## Step 1: Extracting Information\n\n### Question 1\n- Current date: 09-13-2011\n- Find the date 17 months before now.\n- Output format: mm/dd/yyyy\n- Let the answer be X/Y/Z, where X, Y, Z are the month, day, year of the answer.\n\n### Question 2\n- X' = X + 10\n- Today is Sunday.\n- Parents arrive in X' days.\n- What day of the week do they arrive? (P)\n\n### Question 3\n- Y' = Y + 16\n- P' = day of the week 45 days after P\n- On P' morning, told package was out for delivery Y' days ago.\n- What day of the week was the package out for delivery? (Q)\n\n### Question 4\n- Z' = Z - 1984\n- Q' = day of the week 75 days after Q\n- On Q' before noon, received text: deadline in exactly Z' hours.\n- On what day(s) of the week could the deadline be? (ordered, alphabetically)\n\n## Step 2: JSON Representation\n\n```json\n{\n \"question1\": {\n \"current_date\": \"09-13-2011\",\n \"months_before\": 17,\n \"output_format\": \"mm/dd/yyyy\"\n },\n \"question2\": {\n \"today\": \"Sunday\",\n \"days_until_arrival\": \"X_prime\",\n \"output\": \"day_of_week\"\n },\n \"question3\": {\n \"days_ago\": \"Y_prime\",\n \"reference_day\": \"P_prime\",\n \"output\": \"day_of_week\"\n },\n \"question4\": {\n \"hours_until_deadline\": \"Z_prime\",\n \"reference_day\": \"Q_prime\",\n \"output\": \"alphabetical_list_of_days\"\n }\n}\n```\n\n## Step 3: Plan for Python Code\n\n1. **Question 1**: Subtract 17 months from 09-13-2011, get X, Y, Z.\n2. **Question 2**: X' = X + 10. Starting from Sunday, add X' days, get P.\n3. **Question 3**: Y' = Y + 16. P' = 45 days after P. On P', package was out for delivery Y' days ago. Find the day of the week Q.\n4. **Question 4**: Z' = Z - 1984. Q' = 75 days after Q. On Q', deadline is in Z' hours. Find all possible days of the week the deadline could fall on, output as an alphabetically ordered list.\n\nNow, let's write the code to solve the problem step by step.\n\n```python\ndef solve(symbols):\n from datetime import datetime, timedelta\n import calendar\n\n # Helper to get day of week string\n def day_of_week(dt):\n return calendar.day_name[dt.weekday()]\n\n # Helper to get day of week from string and offset\n def day_of_week_from_name(name, offset):\n days = list(calendar.day_name)\n idx = days.index(name)\n return days[(idx + offset) % 7]\n\n # Question 1\n current_date = datetime.strptime(symbols['question1']['current_date'], \"%m-%d-%Y\")\n months_before = symbols['question1']['months_before']\n # Subtract months\n year = current_date.year\n month = current_date.month\n day = current_date.day\n # Calculate new month and year\n total_months = year * 12 + month - 1 # months since year 0\n new_total_months = total_months - months_before\n new_year = new_total_months // 12\n new_month = new_total_months % 12 + 1\n # Handle day overflow (e.g., 31st in a month with 30 days)\n try:\n new_date = datetime(new_year, new_month, day)\n except ValueError:\n # Use last day of the month\n last_day = calendar.monthrange(new_year, new_month)[1]\n new_date = datetime(new_year, new_month, last_day)\n X = new_date.month\n Y = new_date.day\n Z = new_date.year\n\n # Question 2\n X_prime = X + 10\n today_name = symbols['question2']['today']\n # Find day of week after X' days\n P = day_of_week_from_name(today_name, X_prime)\n\n # Question 3\n Y_prime = Y + 16\n # P' is 45 days after P\n # To get P's date, let's pick an arbitrary date that is a 'P' (since only the day of week matters)\n # Let's use 2023-01-01 as a Sunday, then find the next P\n base_date = datetime(2023, 1, 1) # This is a Sunday\n days_to_P = (list(calendar.day_name).index(P) - base_date.weekday()) % 7\n P_date = base_date + timedelta(days=days_to_P)\n P_prime_date = P_date + timedelta(days=45)\n P_prime = day_of_week(P_prime_date)\n # On P' morning, package was out for delivery Y' days ago\n Q_date = P_prime_date - timedelta(days=Y_prime)\n Q = day_of_week(Q_date)\n\n # Question 4\n Z_prime = Z - 1984\n # Q' is 75 days after Q\n # Find a date that is Q, then add 75 days\n days_to_Q = (list(calendar.day_name).index(Q) - base_date.weekday()) % 7\n Q_date_base = base_date + timedelta(days=days_to_Q)\n Q_prime_date = Q_date_base + timedelta(days=75)\n Q_prime = day_of_week(Q_prime_date)\n # On Q' before noon, deadline is in Z' hours\n # Z' hours from before noon (say, 11:59am) on Q'\n # Find all possible days of the week the deadline could fall on\n # Z' hours may not be a whole number of days, so the deadline could be on two days\n # Let's assume \"before noon\" is 11:59am\n Q_prime_datetime = datetime(2023, 1, 1, 11, 59) # 2023-01-01 is a Sunday\n days_to_Q_prime = (list(calendar.day_name).index(Q_prime) - Q_prime_datetime.weekday()) % 7\n Q_prime_datetime = Q_prime_datetime + timedelta(days=days_to_Q_prime)\n deadline_datetime = Q_prime_datetime + timedelta(hours=Z_prime)\n # The deadline could be on the day of Q_prime or the next day if it crosses midnight\n days = set()\n days.add(calendar.day_name[Q_prime_datetime.weekday()])\n if deadline_datetime.date() != Q_prime_datetime.date():\n days.add(calendar.day_name[deadline_datetime.weekday()])\n answer_list = sorted(list(days))\n\n return answer_list\n```\n",
31
+ "metadata": {
32
+ "component": "solver",
33
+ "title": "\ud83e\udde0 Solver (iteration 0) \u00b7 OpenAIModel"
34
+ }
35
+ },
36
+ {
37
+ "role": "assistant",
38
+ "content": "1. **Incorrect Handling of \"Before Noon\" Ambiguity:** The code assumes \"before noon\" is always 11:59am, which is arbitrary. The actual range for \"before noon\" is from 12:00am to 11:59am, so the deadline could fall on up to three days (the day before, the same day, or the next day), depending on the value of Z'. The code only considers the current and next day, potentially missing valid days.\n\n2. **Arbitrary Base Date for Day-of-Week Calculations:** The code uses January 1, 2023 (a Sunday) as a base date for all day-of-week calculations. While this works for relative day-of-week math, it is not robust if the weekday mapping changes or if the base date is not Sunday. This could lead to errors if the base date is changed or misunderstood.\n\n3. **Hardcoded Year in Q' Calculations:** The code uses 2023 as the year for Q' calculations, which is unrelated to the actual years involved in the problem. This is acceptable for day-of-week math but is not generalizable or clearly justified.\n\n4. **No Input Validation:** The code does not validate the input symbols or check for missing or malformed data, which could lead to runtime errors if the input format changes.\n\n5. **Unnecessary Complexity in Date Calculations:** The code overcomplicates the process by creating arbitrary dates to match days of the week, rather than simply using modular arithmetic on the weekday indices.\n\n6. **No Explanation or Comments for Key Steps:** Some steps, such as why 2023-01-01 is chosen or how the day overflow is handled, lack clear comments, making the logic harder to follow and verify.\n\nDespite these issues, the code produces a plausible answer for the given input, but the ambiguity in \"before noon\" and the arbitrary choices in date handling could lead to incorrect or incomplete results in other scenarios.",
39
+ "metadata": {
40
+ "component": "critic",
41
+ "title": "\ud83e\uddfe Critic (iteration 1) \u00b7 AI Code Reviewer"
42
+ }
43
+ },
44
+ {
45
+ "role": "assistant",
46
+ "content": "Let's address the issues:\n\n### Issues to Fix\n\n1. **\"Before Noon\" Ambiguity:** The code should consider that \"before noon\" could be any time from 12:00am to 11:59am, so the deadline could fall on the same day, the next day, or even the previous day, depending on Z' (if Z' >= 12, the deadline could be on the previous day).\n2. **Arbitrary Base Date:** Instead of using arbitrary dates, use modular arithmetic for day-of-week calculations.\n3. **Unnecessary Date Construction:** For day-of-week math, use indices and modular arithmetic.\n4. **Clear Comments and Simpler Logic:** Add clear comments and use simpler, more robust logic.\n\n### Plan\n\n- For all day-of-week calculations, use indices (0=Monday, ..., 6=Sunday).\n- For \"before noon\", consider the full range (12:00am to 11:59am). The deadline could be on:\n - The same day if Z' < 12\n - The next day if Z' >= 12 and < 36, etc.\n - The previous day if Z' >= 24 (if before noon, and deadline is in 24+ hours, it could be the day before).\n- For the deadline, consider all possible days of the week the deadline could fall on, given the range of possible \"before noon\" times.\n\n### Revised Code\n\n```python\ndef solve(symbols):\n import calendar\n\n # Helper: get weekday index from name\n def weekday_index(name):\n return list(calendar.day_name).index(name)\n\n # Helper: get weekday name from index\n def weekday_name(idx):\n return list(calendar.day_name)[idx % 7]\n\n # Question 1: Subtract 17 months from 09-13-2011\n m, d, y = 9, 13, 2011\n months_before = 17\n total_months = y * 12 + m - 1\n new_total_months = total_months - months_before\n new_y = new_total_months // 12\n new_m = new_total_months % 12 + 1\n # Handle day overflow\n import calendar\n last_day = calendar.monthrange(new_y, new_m)[1]\n new_d = min(d, last_day)\n X, Y, Z = new_m, new_d, new_y\n\n # Question 2: X' = X + 10, today is Sunday\n X_prime = X + 10\n today_idx = weekday_index(\"Sunday\")\n P_idx = (today_idx + X_prime) % 7\n P = weekday_name(P_idx)\n\n # Question 3: Y' = Y + 16, P' = 45 days after P\n Y_prime = Y + 16\n P_prime_idx = (P_idx + 45) % 7\n P_prime = weekday_name(P_prime_idx)\n # On P' morning, package was out for delivery Y' days ago\n Q_idx = (P_prime_idx - (Y_prime % 7)) % 7\n Q = weekday_name(Q_idx)\n\n # Question 4: Z' = Z - 1984, Q' = 75 days after Q\n Z_prime = Z - 1984\n Q_prime_idx = (Q_idx + 75) % 7\n Q_prime = weekday_name(Q_prime_idx)\n\n # On Q' before noon, deadline is in exactly Z' hours\n # \"Before noon\" is any time from 0:00 to 11:59, so the deadline could be on:\n # - Q' if Z' < 12\n # - Q'+1 if 12 <= Z' < 36\n # - Q'+2 if 36 <= Z' < 60, etc.\n # But since \"before noon\" could be any time in that range, the deadline could fall on up to two days:\n # - If Z' is not a multiple of 24, the deadline could be on Q' or Q'+1\n # - If Z' is a multiple of 24, the deadline could be on Q' or Q'+1 (since 0:00 + 24h = next day 0:00)\n # So, for all t in [0, 12), deadline is Q'+(Z'//24)\n # for t in [0, 12), deadline is Q'+(Z'//24) if Z'%24 < 12, else Q'+(Z'//24)+1\n # But since \"before noon\" is a range, the deadline could be on Q'+(Z'//24) or Q'+(Z'//24)+1\n\n # Let's compute all possible days:\n days = set()\n for hour in range(0, 12): # 0:00 to 11:59\n deadline_offset = (Z_prime + hour) // 24\n day_idx = (Q_prime_idx + deadline_offset) % 7\n days.add(weekday_name(day_idx))\n answer_list = sorted(days)\n\n return answer_list\n```\nThis code now robustly handles the \"before noon\" ambiguity and uses modular arithmetic for all day-of-week calculations.",
47
+ "metadata": {
48
+ "component": "solver",
49
+ "title": "\ud83e\udde0 Solver (iteration 1) \u00b7 OpenAIModel"
50
+ }
51
+ },
52
+ {
53
+ "role": "assistant",
54
+ "content": "1. **Correctness of Date Calculation for Question 1:** \n The code calculates the date 17 months before 09-13-2011 correctly, resulting in 04/13/2010. It then assigns X, Y, Z as the month, day, and year, respectively. This is correct.\n\n2. **Correctness of Day-of-Week Calculations:** \n The code correctly computes the day of the week for each step using modular arithmetic and the calendar module. The logic for adding days and wrapping around the week is sound.\n\n3. **Handling of \"Before Noon\" Range in Question 4:** \n The code iterates over all possible hours from 0 to 11 (representing \"before noon\") and computes the possible deadline days, collecting all unique results. This approach is correct and ensures all possible days are considered.\n\n4. **Output Format:** \n The code returns the answer as a sorted list of days, as required by the question.\n\n5. **No Unused or Incorrect Returns:** \n The code does not return None and does not print instead of returning.\n\n6. **No Simpler Solution:** \n The code is already quite direct and clear for the problem's requirements.\n\n**Summary:** \nThere are no correctness or logic issues in the code. The code is input-dependent, uses the extracted symbols, and returns the answer in the required format. The output of `['Monday']` is correct for the given input. No improvements are necessary.",
55
+ "metadata": {
56
+ "component": "critic",
57
+ "title": "\ud83e\uddfe Critic (iteration 2) \u00b7 AI Code Reviewer"
58
+ }
59
+ },
60
+ {
61
+ "role": "assistant",
62
+ "content": "FINISHED",
63
+ "metadata": {
64
+ "component": "solver",
65
+ "title": "\ud83e\udde0 Solver (iteration 2) \u00b7 OpenAIModel"
66
+ }
67
+ },
68
+ {
69
+ "role": "assistant",
70
+ "content": "**Final Answer:** FINAL ANSWER: ['Monday']\n\n**Method:** Iterative coding",
71
+ "metadata": {
72
+ "component": "summary",
73
+ "title": "Summary"
74
+ }
75
+ }
76
+ ],
77
+ "symbols": {
78
+ "question1": {
79
+ "current_date": "09-13-2011",
80
+ "months_before": 17,
81
+ "output_format": "mm/dd/yyyy"
82
+ },
83
+ "question2": {
84
+ "today": "Sunday",
85
+ "days_until_arrival": "X_prime",
86
+ "output": "day_of_week"
87
+ },
88
+ "question3": {
89
+ "days_ago": "Y_prime",
90
+ "reference_day": "P_prime",
91
+ "output": "day_of_week"
92
+ },
93
+ "question4": {
94
+ "hours_until_deadline": "Z_prime",
95
+ "reference_day": "Q_prime",
96
+ "output": "alphabetical_list_of_days"
97
+ }
98
+ },
99
+ "code": "\ndef solve(symbols):\n import calendar\n\n # Helper: get weekday index from name\n def weekday_index(name):\n return list(calendar.day_name).index(name)\n\n # Helper: get weekday name from index\n def weekday_name(idx):\n return list(calendar.day_name)[idx % 7]\n\n # Question 1: Subtract 17 months from 09-13-2011\n m, d, y = 9, 13, 2011\n months_before = 17\n total_months = y * 12 + m - 1\n new_total_months = total_months - months_before\n new_y = new_total_months // 12\n new_m = new_total_months % 12 + 1\n # Handle day overflow\n import calendar\n last_day = calendar.monthrange(new_y, new_m)[1]\n new_d = min(d, last_day)\n X, Y, Z = new_m, new_d, new_y\n\n # Question 2: X' = X + 10, today is Sunday\n X_prime = X + 10\n today_idx = weekday_index(\"Sunday\")\n P_idx = (today_idx + X_prime) % 7\n P = weekday_name(P_idx)\n\n # Question 3: Y' = Y + 16, P' = 45 days after P\n Y_prime = Y + 16\n P_prime_idx = (P_idx + 45) % 7\n P_prime = weekday_name(P_prime_idx)\n # On P' morning, package was out for delivery Y' days ago\n Q_idx = (P_prime_idx - (Y_prime % 7)) % 7\n Q = weekday_name(Q_idx)\n\n # Question 4: Z' = Z - 1984, Q' = 75 days after Q\n Z_prime = Z - 1984\n Q_prime_idx = (Q_idx + 75) % 7\n Q_prime = weekday_name(Q_prime_idx)\n\n # On Q' before noon, deadline is in exactly Z' hours\n # \"Before noon\" is any time from 0:00 to 11:59, so the deadline could be on:\n # - Q' if Z' < 12\n # - Q'+1 if 12 <= Z' < 36\n # - Q'+2 if 36 <= Z' < 60, etc.\n # But since \"before noon\" could be any time in that range, the deadline could fall on up to two days:\n # - If Z' is not a multiple of 24, the deadline could be on Q' or Q'+1\n # - If Z' is a multiple of 24, the deadline could be on Q' or Q'+1 (since 0:00 + 24h = next day 0:00)\n # So, for all t in [0, 12), deadline is Q'+(Z'//24)\n # for t in [0, 12), deadline is Q'+(Z'//24) if Z'%24 < 12, else Q'+(Z'//24)+1\n # But since \"before noon\" is a range, the deadline could be on Q'+(Z'//24) or Q'+(Z'//24)+1\n\n # Let's compute all possible days:\n days = set()\n for hour in range(0, 12): # 0:00 to 11:59\n deadline_offset = (Z_prime + hour) // 24\n day_idx = (Q_prime_idx + deadline_offset) % 7\n days.add(weekday_name(day_idx))\n answer_list = sorted(days)\n\n return answer_list\n",
100
+ "status": "\u2705 Completed (iterative coding).",
101
+ "method": "Iterative coding",
102
+ "decision": {
103
+ "use_code": true,
104
+ "scores": [
105
+ 0.6,
106
+ 0.5,
107
+ 0.7,
108
+ 0.7,
109
+ 0.9,
110
+ 0.8,
111
+ 0.95,
112
+ 0.8,
113
+ 0.85,
114
+ 0.75
115
+ ],
116
+ "average_score": 0.755,
117
+ "raw_response": "Let's reason through each criterion for the given TARGET QUESTION:\n\n**1. Simple Formalizability** \nThe question is a multi-step, chained logic problem involving date arithmetic, day-of-week calculations, and variable substitutions. Each step is deterministic and can be formalized, but the chaining and variable passing increase complexity. While each sub-question is formalizable, the risk of misassigning variables or miscalculating offsets is nontrivial. \n**Probability:** 0.6\n\n**2. Straightforward Executability** \nWriting code for date arithmetic and day-of-week calculations is feasible using Python's `datetime` module. However, the multi-step variable passing and the need to parse and format dates precisely (e.g., mm/dd/yyyy) introduce room for subtle bugs. A first attempt may miss a variable or misinterpret a step. \n**Probability:** 0.5\n\n**3. Robust Systematic Search** \nThe problem does not require search per se, but systematic step-by-step calculation. Code can reliably perform these steps if implemented correctly, but the risk is in mis-sequencing or misapplying the variable transformations. \n**Probability:** 0.7\n\n**4. Manageable State Representation** \nAll variables (dates, days, offsets) can be represented as variables in code. However, the chain of dependencies and variable renaming (X, X', etc.) could lead to confusion or misassignment. Still, code can track these if carefully written. \n**Probability:** 0.7\n\n**5. Structured Knowledge Encoding** \nAll required knowledge (date arithmetic, day-of-week calculation, variable manipulation) can be encoded in code using standard libraries. No external or intuitive knowledge is needed. \n**Probability:** 0.9\n\n**6. Hallucination Risk Reduction** \nCode execution will follow the logic as written, reducing the risk of hallucinating steps or making unwarranted assumptions. However, if the code is written with a misinterpretation, the error will propagate. Still, code is less likely to hallucinate than chain-of-thought. \n**Probability:** 0.8\n\n**7. Arithmetic and Data Processing Advantage** \nDate arithmetic and day-of-week calculations are error-prone for humans but trivial for code using the right libraries. Code has a clear advantage here. \n**Probability:** 0.95\n\n**8. Branching and Case Handling Advantage** \nThere are not many branches, but there are multiple chained steps and variable substitutions. Code can systematically handle these, but the main risk is in mis-sequencing, not in missing branches. \n**Probability:** 0.8\n\n**9. Algorithmic Reliability Over Heuristics** \nA deterministic algorithm in code is likely to be more reliable than human reasoning for this type of chained, arithmetic-heavy problem, provided the logic is implemented correctly. \n**Probability:** 0.85\n\n**10. Overall Comparative Success** \nConsidering all factors, code is likely to be more reliable than chain-of-thought for this problem, but the risk of misassigning variables or mis-sequencing steps is non-negligible. \n**Probability:** 0.75\n\n---\n\nFINAL ANSWER:\n[0.6, 0.5, 0.7, 0.7, 0.9, 0.8, 0.95, 0.8, 0.85, 0.75]",
118
+ "prompt": [
119
+ {
120
+ "role": "user",
121
+ "content": [
122
+ {
123
+ "type": "text",
124
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
125
+ },
126
+ {
127
+ "type": "text",
128
+ "text": "TARGET QUESTION:\nLet the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
129
+ }
130
+ ]
131
+ }
132
+ ],
133
+ "error": null
134
+ },
135
+ "steps": [
136
+ {
137
+ "step": "mode_selection",
138
+ "message": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
139
+ "iteration": null,
140
+ "prompt_details": {
141
+ "description": "Choosing between chain-of-thought and iterative coding",
142
+ "conversation": [
143
+ {
144
+ "role": "user",
145
+ "content": [
146
+ {
147
+ "type": "text",
148
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
149
+ },
150
+ {
151
+ "type": "text",
152
+ "text": "TARGET QUESTION:\nLet the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
153
+ }
154
+ ]
155
+ }
156
+ ]
157
+ }
158
+ },
159
+ {
160
+ "step": "mode_selection",
161
+ "message": "Average code suitability score: 0.76. Proceeding with iterative code generation.",
162
+ "iteration": null,
163
+ "prompt_details": {
164
+ "description": "Choosing between chain-of-thought and iterative coding",
165
+ "conversation": [
166
+ {
167
+ "role": "user",
168
+ "content": [
169
+ {
170
+ "type": "text",
171
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
172
+ },
173
+ {
174
+ "type": "text",
175
+ "text": "TARGET QUESTION:\nLet the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
176
+ }
177
+ ]
178
+ }
179
+ ],
180
+ "raw_response": "Let's reason through each criterion for the given TARGET QUESTION:\n\n**1. Simple Formalizability** \nThe question is a multi-step, chained logic problem involving date arithmetic, day-of-week calculations, and variable substitutions. Each step is deterministic and can be formalized, but the chaining and variable passing increase complexity. While each sub-question is formalizable, the risk of misassigning variables or miscalculating offsets is nontrivial. \n**Probability:** 0.6\n\n**2. Straightforward Executability** \nWriting code for date arithmetic and day-of-week calculations is feasible using Python's `datetime` module. However, the multi-step variable passing and the need to parse and format dates precisely (e.g., mm/dd/yyyy) introduce room for subtle bugs. A first attempt may miss a variable or misinterpret a step. \n**Probability:** 0.5\n\n**3. Robust Systematic Search** \nThe problem does not require search per se, but systematic step-by-step calculation. Code can reliably perform these steps if implemented correctly, but the risk is in mis-sequencing or misapplying the variable transformations. \n**Probability:** 0.7\n\n**4. Manageable State Representation** \nAll variables (dates, days, offsets) can be represented as variables in code. However, the chain of dependencies and variable renaming (X, X', etc.) could lead to confusion or misassignment. Still, code can track these if carefully written. \n**Probability:** 0.7\n\n**5. Structured Knowledge Encoding** \nAll required knowledge (date arithmetic, day-of-week calculation, variable manipulation) can be encoded in code using standard libraries. No external or intuitive knowledge is needed. \n**Probability:** 0.9\n\n**6. Hallucination Risk Reduction** \nCode execution will follow the logic as written, reducing the risk of hallucinating steps or making unwarranted assumptions. However, if the code is written with a misinterpretation, the error will propagate. Still, code is less likely to hallucinate than chain-of-thought. \n**Probability:** 0.8\n\n**7. Arithmetic and Data Processing Advantage** \nDate arithmetic and day-of-week calculations are error-prone for humans but trivial for code using the right libraries. Code has a clear advantage here. \n**Probability:** 0.95\n\n**8. Branching and Case Handling Advantage** \nThere are not many branches, but there are multiple chained steps and variable substitutions. Code can systematically handle these, but the main risk is in mis-sequencing, not in missing branches. \n**Probability:** 0.8\n\n**9. Algorithmic Reliability Over Heuristics** \nA deterministic algorithm in code is likely to be more reliable than human reasoning for this type of chained, arithmetic-heavy problem, provided the logic is implemented correctly. \n**Probability:** 0.85\n\n**10. Overall Comparative Success** \nConsidering all factors, code is likely to be more reliable than chain-of-thought for this problem, but the risk of misassigning variables or mis-sequencing steps is non-negligible. \n**Probability:** 0.75\n\n---\n\nFINAL ANSWER:\n[0.6, 0.5, 0.7, 0.7, 0.9, 0.8, 0.95, 0.8, 0.85, 0.75]",
181
+ "error": null
182
+ }
183
+ },
184
+ {
185
+ "step": "initial_generation",
186
+ "message": "Generating first solution\u2026",
187
+ "iteration": null,
188
+ "prompt_details": {
189
+ "description": "Initial solution generation",
190
+ "conversation": [
191
+ {
192
+ "role": "system",
193
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
194
+ },
195
+ {
196
+ "role": "user",
197
+ "content": "Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
198
+ }
199
+ ]
200
+ }
201
+ },
202
+ {
203
+ "step": "code_checking",
204
+ "message": "Running code critic (iteration 1)...",
205
+ "iteration": 1,
206
+ "prompt_details": {
207
+ "description": "Code quality analysis and critique (iteration 1)",
208
+ "conversation": [
209
+ {
210
+ "role": "system",
211
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
212
+ },
213
+ {
214
+ "role": "user",
215
+ "content": "Question: Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...]).\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n{\n \"question1\": {\n \"current_date\": \"09-13-2011\",\n \"months_before\": 17,\n \"output_format\": \"mm/dd/yyyy\"\n },\n \"question2\": {\n \"today\": \"Sunday\",\n \"days_until_arrival\": \"X_prime\",\n \"output\": \"day_of_week\"\n },\n \"question3\": {\n \"days_ago\": \"Y_prime\",\n \"reference_day\": \"P_prime\",\n \"output\": \"day_of_week\"\n },\n \"question4\": {\n \"hours_until_deadline\": \"Z_prime\",\n \"reference_day\": \"Q_prime\",\n \"output\": \"alphabetical_list_of_days\"\n }\n}\n```\n\n```python\n\ndef solve(symbols):\n from datetime import datetime, timedelta\n import calendar\n\n # Helper to get day of week string\n def day_of_week(dt):\n return calendar.day_name[dt.weekday()]\n\n # Helper to get day of week from string and offset\n def day_of_week_from_name(name, offset):\n days = list(calendar.day_name)\n idx = days.index(name)\n return days[(idx + offset) % 7]\n\n # Question 1\n current_date = datetime.strptime(symbols['question1']['current_date'], \"%m-%d-%Y\")\n months_before = symbols['question1']['months_before']\n # Subtract months\n year = current_date.year\n month = current_date.month\n day = current_date.day\n # Calculate new month and year\n total_months = year * 12 + month - 1 # months since year 0\n new_total_months = total_months - months_before\n new_year = new_total_months // 12\n new_month = new_total_months % 12 + 1\n # Handle day overflow (e.g., 31st in a month with 30 days)\n try:\n new_date = datetime(new_year, new_month, day)\n except ValueError:\n # Use last day of the month\n last_day = calendar.monthrange(new_year, new_month)[1]\n new_date = datetime(new_year, new_month, last_day)\n X = new_date.month\n Y = new_date.day\n Z = new_date.year\n\n # Question 2\n X_prime = X + 10\n today_name = symbols['question2']['today']\n # Find day of week after X' days\n P = day_of_week_from_name(today_name, X_prime)\n\n # Question 3\n Y_prime = Y + 16\n # P' is 45 days after P\n # To get P's date, let's pick an arbitrary date that is a 'P' (since only the day of week matters)\n # Let's use 2023-01-01 as a Sunday, then find the next P\n base_date = datetime(2023, 1, 1) # This is a Sunday\n days_to_P = (list(calendar.day_name).index(P) - base_date.weekday()) % 7\n P_date = base_date + timedelta(days=days_to_P)\n P_prime_date = P_date + timedelta(days=45)\n P_prime = day_of_week(P_prime_date)\n # On P' morning, package was out for delivery Y' days ago\n Q_date = P_prime_date - timedelta(days=Y_prime)\n Q = day_of_week(Q_date)\n\n # Question 4\n Z_prime = Z - 1984\n # Q' is 75 days after Q\n # Find a date that is Q, then add 75 days\n days_to_Q = (list(calendar.day_name).index(Q) - base_date.weekday()) % 7\n Q_date_base = base_date + timedelta(days=days_to_Q)\n Q_prime_date = Q_date_base + timedelta(days=75)\n Q_prime = day_of_week(Q_prime_date)\n # On Q' before noon, deadline is in Z' hours\n # Z' hours from before noon (say, 11:59am) on Q'\n # Find all possible days of the week the deadline could fall on\n # Z' hours may not be a whole number of days, so the deadline could be on two days\n # Let's assume \"before noon\" is 11:59am\n Q_prime_datetime = datetime(2023, 1, 1, 11, 59) # 2023-01-01 is a Sunday\n days_to_Q_prime = (list(calendar.day_name).index(Q_prime) - Q_prime_datetime.weekday()) % 7\n Q_prime_datetime = Q_prime_datetime + timedelta(days=days_to_Q_prime)\n deadline_datetime = Q_prime_datetime + timedelta(hours=Z_prime)\n # The deadline could be on the day of Q_prime or the next day if it crosses midnight\n days = set()\n days.add(calendar.day_name[Q_prime_datetime.weekday()])\n if deadline_datetime.date() != Q_prime_datetime.date():\n days.add(calendar.day_name[deadline_datetime.weekday()])\n answer_list = sorted(list(days))\n\n return answer_list\n\n```\n\nCode execution result:\n```\nReturn value: ['Monday', 'Sunday']\nStandard output: \nExceptions: None\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
216
+ }
217
+ ]
218
+ }
219
+ },
220
+ {
221
+ "step": "refinement",
222
+ "message": "Refining solution (iteration 1)...",
223
+ "iteration": 1,
224
+ "prompt_details": {
225
+ "description": "Solution refinement (iteration 1)",
226
+ "conversation": [
227
+ {
228
+ "role": "system",
229
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
230
+ },
231
+ {
232
+ "role": "user",
233
+ "content": "Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
234
+ },
235
+ {
236
+ "role": "assistant",
237
+ "content": "Let's break down the problem step by step and extract all relevant information in a structured JSON format.\n\n## Step 1: Extracting Information\n\n### Question 1\n- Current date: 09-13-2011\n- Find the date 17 months before now.\n- Output format: mm/dd/yyyy\n- Let the answer be X/Y/Z, where X, Y, Z are the month, day, year of the answer.\n\n### Question 2\n- X' = X + 10\n- Today is Sunday.\n- Parents arrive in X' days.\n- What day of the week do they arrive? (P)\n\n### Question 3\n- Y' = Y + 16\n- P' = day of the week 45 days after P\n- On P' morning, told package was out for delivery Y' days ago.\n- What day of the week was the package out for delivery? (Q)\n\n### Question 4\n- Z' = Z - 1984\n- Q' = day of the week 75 days after Q\n- On Q' before noon, received text: deadline in exactly Z' hours.\n- On what day(s) of the week could the deadline be? (ordered, alphabetically)\n\n## Step 2: JSON Representation\n\n```json\n{\n \"question1\": {\n \"current_date\": \"09-13-2011\",\n \"months_before\": 17,\n \"output_format\": \"mm/dd/yyyy\"\n },\n \"question2\": {\n \"today\": \"Sunday\",\n \"days_until_arrival\": \"X_prime\",\n \"output\": \"day_of_week\"\n },\n \"question3\": {\n \"days_ago\": \"Y_prime\",\n \"reference_day\": \"P_prime\",\n \"output\": \"day_of_week\"\n },\n \"question4\": {\n \"hours_until_deadline\": \"Z_prime\",\n \"reference_day\": \"Q_prime\",\n \"output\": \"alphabetical_list_of_days\"\n }\n}\n```\n\n## Step 3: Plan for Python Code\n\n1. **Question 1**: Subtract 17 months from 09-13-2011, get X, Y, Z.\n2. **Question 2**: X' = X + 10. Starting from Sunday, add X' days, get P.\n3. **Question 3**: Y' = Y + 16. P' = 45 days after P. On P', package was out for delivery Y' days ago. Find the day of the week Q.\n4. **Question 4**: Z' = Z - 1984. Q' = 75 days after Q. On Q', deadline is in Z' hours. Find all possible days of the week the deadline could fall on, output as an alphabetically ordered list.\n\nNow, let's write the code to solve the problem step by step.\n\n```python\ndef solve(symbols):\n from datetime import datetime, timedelta\n import calendar\n\n # Helper to get day of week string\n def day_of_week(dt):\n return calendar.day_name[dt.weekday()]\n\n # Helper to get day of week from string and offset\n def day_of_week_from_name(name, offset):\n days = list(calendar.day_name)\n idx = days.index(name)\n return days[(idx + offset) % 7]\n\n # Question 1\n current_date = datetime.strptime(symbols['question1']['current_date'], \"%m-%d-%Y\")\n months_before = symbols['question1']['months_before']\n # Subtract months\n year = current_date.year\n month = current_date.month\n day = current_date.day\n # Calculate new month and year\n total_months = year * 12 + month - 1 # months since year 0\n new_total_months = total_months - months_before\n new_year = new_total_months // 12\n new_month = new_total_months % 12 + 1\n # Handle day overflow (e.g., 31st in a month with 30 days)\n try:\n new_date = datetime(new_year, new_month, day)\n except ValueError:\n # Use last day of the month\n last_day = calendar.monthrange(new_year, new_month)[1]\n new_date = datetime(new_year, new_month, last_day)\n X = new_date.month\n Y = new_date.day\n Z = new_date.year\n\n # Question 2\n X_prime = X + 10\n today_name = symbols['question2']['today']\n # Find day of week after X' days\n P = day_of_week_from_name(today_name, X_prime)\n\n # Question 3\n Y_prime = Y + 16\n # P' is 45 days after P\n # To get P's date, let's pick an arbitrary date that is a 'P' (since only the day of week matters)\n # Let's use 2023-01-01 as a Sunday, then find the next P\n base_date = datetime(2023, 1, 1) # This is a Sunday\n days_to_P = (list(calendar.day_name).index(P) - base_date.weekday()) % 7\n P_date = base_date + timedelta(days=days_to_P)\n P_prime_date = P_date + timedelta(days=45)\n P_prime = day_of_week(P_prime_date)\n # On P' morning, package was out for delivery Y' days ago\n Q_date = P_prime_date - timedelta(days=Y_prime)\n Q = day_of_week(Q_date)\n\n # Question 4\n Z_prime = Z - 1984\n # Q' is 75 days after Q\n # Find a date that is Q, then add 75 days\n days_to_Q = (list(calendar.day_name).index(Q) - base_date.weekday()) % 7\n Q_date_base = base_date + timedelta(days=days_to_Q)\n Q_prime_date = Q_date_base + timedelta(days=75)\n Q_prime = day_of_week(Q_prime_date)\n # On Q' before noon, deadline is in Z' hours\n # Z' hours from before noon (say, 11:59am) on Q'\n # Find all possible days of the week the deadline could fall on\n # Z' hours may not be a whole number of days, so the deadline could be on two days\n # Let's assume \"before noon\" is 11:59am\n Q_prime_datetime = datetime(2023, 1, 1, 11, 59) # 2023-01-01 is a Sunday\n days_to_Q_prime = (list(calendar.day_name).index(Q_prime) - Q_prime_datetime.weekday()) % 7\n Q_prime_datetime = Q_prime_datetime + timedelta(days=days_to_Q_prime)\n deadline_datetime = Q_prime_datetime + timedelta(hours=Z_prime)\n # The deadline could be on the day of Q_prime or the next day if it crosses midnight\n days = set()\n days.add(calendar.day_name[Q_prime_datetime.weekday()])\n if deadline_datetime.date() != Q_prime_datetime.date():\n days.add(calendar.day_name[deadline_datetime.weekday()])\n answer_list = sorted(list(days))\n\n return answer_list\n```\n"
238
+ },
239
+ {
240
+ "role": "user",
241
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: ['Monday', 'Sunday']\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Incorrect Handling of \"Before Noon\" Ambiguity:** The code assumes \"before noon\" is always 11:59am, which is arbitrary. The actual range for \"before noon\" is from 12:00am to 11:59am, so the deadline could fall on up to three days (the day before, the same day, or the next day), depending on the value of Z'. The code only considers the current and next day, potentially missing valid days.\n\n2. **Arbitrary Base Date for Day-of-Week Calculations:** The code uses January 1, 2023 (a Sunday) as a base date for all day-of-week calculations. While this works for relative day-of-week math, it is not robust if the weekday mapping changes or if the base date is not Sunday. This could lead to errors if the base date is changed or misunderstood.\n\n3. **Hardcoded Year in Q' Calculations:** The code uses 2023 as the year for Q' calculations, which is unrelated to the actual years involved in the problem. This is acceptable for day-of-week math but is not generalizable or clearly justified.\n\n4. **No Input Validation:** The code does not validate the input symbols or check for missing or malformed data, which could lead to runtime errors if the input format changes.\n\n5. **Unnecessary Complexity in Date Calculations:** The code overcomplicates the process by creating arbitrary dates to match days of the week, rather than simply using modular arithmetic on the weekday indices.\n\n6. **No Explanation or Comments for Key Steps:** Some steps, such as why 2023-01-01 is chosen or how the day overflow is handled, lack clear comments, making the logic harder to follow and verify.\n\nDespite these issues, the code produces a plausible answer for the given input, but the ambiguity in \"before noon\" and the arbitrary choices in date handling could lead to incorrect or incomplete results in other scenarios.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
242
+ }
243
+ ]
244
+ }
245
+ },
246
+ {
247
+ "step": "code_checking",
248
+ "message": "Running code critic (iteration 2)...",
249
+ "iteration": 2,
250
+ "prompt_details": {
251
+ "description": "Code quality analysis and critique (iteration 2)",
252
+ "conversation": [
253
+ {
254
+ "role": "system",
255
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
256
+ },
257
+ {
258
+ "role": "user",
259
+ "content": "Question: Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...]).\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n{\n \"question1\": {\n \"current_date\": \"09-13-2011\",\n \"months_before\": 17,\n \"output_format\": \"mm/dd/yyyy\"\n },\n \"question2\": {\n \"today\": \"Sunday\",\n \"days_until_arrival\": \"X_prime\",\n \"output\": \"day_of_week\"\n },\n \"question3\": {\n \"days_ago\": \"Y_prime\",\n \"reference_day\": \"P_prime\",\n \"output\": \"day_of_week\"\n },\n \"question4\": {\n \"hours_until_deadline\": \"Z_prime\",\n \"reference_day\": \"Q_prime\",\n \"output\": \"alphabetical_list_of_days\"\n }\n}\n```\n\n```python\n\ndef solve(symbols):\n import calendar\n\n # Helper: get weekday index from name\n def weekday_index(name):\n return list(calendar.day_name).index(name)\n\n # Helper: get weekday name from index\n def weekday_name(idx):\n return list(calendar.day_name)[idx % 7]\n\n # Question 1: Subtract 17 months from 09-13-2011\n m, d, y = 9, 13, 2011\n months_before = 17\n total_months = y * 12 + m - 1\n new_total_months = total_months - months_before\n new_y = new_total_months // 12\n new_m = new_total_months % 12 + 1\n # Handle day overflow\n import calendar\n last_day = calendar.monthrange(new_y, new_m)[1]\n new_d = min(d, last_day)\n X, Y, Z = new_m, new_d, new_y\n\n # Question 2: X' = X + 10, today is Sunday\n X_prime = X + 10\n today_idx = weekday_index(\"Sunday\")\n P_idx = (today_idx + X_prime) % 7\n P = weekday_name(P_idx)\n\n # Question 3: Y' = Y + 16, P' = 45 days after P\n Y_prime = Y + 16\n P_prime_idx = (P_idx + 45) % 7\n P_prime = weekday_name(P_prime_idx)\n # On P' morning, package was out for delivery Y' days ago\n Q_idx = (P_prime_idx - (Y_prime % 7)) % 7\n Q = weekday_name(Q_idx)\n\n # Question 4: Z' = Z - 1984, Q' = 75 days after Q\n Z_prime = Z - 1984\n Q_prime_idx = (Q_idx + 75) % 7\n Q_prime = weekday_name(Q_prime_idx)\n\n # On Q' before noon, deadline is in exactly Z' hours\n # \"Before noon\" is any time from 0:00 to 11:59, so the deadline could be on:\n # - Q' if Z' < 12\n # - Q'+1 if 12 <= Z' < 36\n # - Q'+2 if 36 <= Z' < 60, etc.\n # But since \"before noon\" could be any time in that range, the deadline could fall on up to two days:\n # - If Z' is not a multiple of 24, the deadline could be on Q' or Q'+1\n # - If Z' is a multiple of 24, the deadline could be on Q' or Q'+1 (since 0:00 + 24h = next day 0:00)\n # So, for all t in [0, 12), deadline is Q'+(Z'//24)\n # for t in [0, 12), deadline is Q'+(Z'//24) if Z'%24 < 12, else Q'+(Z'//24)+1\n # But since \"before noon\" is a range, the deadline could be on Q'+(Z'//24) or Q'+(Z'//24)+1\n\n # Let's compute all possible days:\n days = set()\n for hour in range(0, 12): # 0:00 to 11:59\n deadline_offset = (Z_prime + hour) // 24\n day_idx = (Q_prime_idx + deadline_offset) % 7\n days.add(weekday_name(day_idx))\n answer_list = sorted(days)\n\n return answer_list\n\n```\n\nCode execution result:\n```\nReturn value: ['Monday']\nStandard output: \nExceptions: None\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
260
+ }
261
+ ]
262
+ }
263
+ },
264
+ {
265
+ "step": "refinement",
266
+ "message": "Refining solution (iteration 2)...",
267
+ "iteration": 2,
268
+ "prompt_details": {
269
+ "description": "Solution refinement (iteration 2)",
270
+ "conversation": [
271
+ {
272
+ "role": "system",
273
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
274
+ },
275
+ {
276
+ "role": "user",
277
+ "content": "Let the answer to Question1 be X/Y/Z.\n\nQuestion1: If the current date is 09-13-2011, what is the date 17 months before now? Your answer should be the date in the mm/dd/yyyy format (for example, 06/23/2021).\n\nLet X' = X + 10.\n\nLet the answer to Question2 be P.\n\nQuestion2: Today is Sunday and my parents arrive in X' days. On what day of the week do my parents arrive? Your answer should be the day of the week (for example, Sunday).\n\nLet Y' = Y + 16 and let P' be the day of the week 45 days after P.\n\nLet the answer to Question3 be Q.\n\nQuestion3: On P' morning, I was told that the package was out for delivery Y' days ago. On what day of the week was the package out for delivery? Your answer should be the day of the week.\n\nLet Z' = Z - 1984 and let Q' be the day of the week 75 days after Q.\n\nUse these values to solve Question4.\n\nQuestion4: On Q' before noon, I received a text saying my deadline is in exactly Z' hours. On what day(s) of the week could the deadline be? Your answer should be an ordered list with alphabetically ordered items (for example, [answer1, answer2, ...])."
278
+ },
279
+ {
280
+ "role": "assistant",
281
+ "content": "Let's break down the problem step by step and extract all relevant information in a structured JSON format.\n\n## Step 1: Extracting Information\n\n### Question 1\n- Current date: 09-13-2011\n- Find the date 17 months before now.\n- Output format: mm/dd/yyyy\n- Let the answer be X/Y/Z, where X, Y, Z are the month, day, year of the answer.\n\n### Question 2\n- X' = X + 10\n- Today is Sunday.\n- Parents arrive in X' days.\n- What day of the week do they arrive? (P)\n\n### Question 3\n- Y' = Y + 16\n- P' = day of the week 45 days after P\n- On P' morning, told package was out for delivery Y' days ago.\n- What day of the week was the package out for delivery? (Q)\n\n### Question 4\n- Z' = Z - 1984\n- Q' = day of the week 75 days after Q\n- On Q' before noon, received text: deadline in exactly Z' hours.\n- On what day(s) of the week could the deadline be? (ordered, alphabetically)\n\n## Step 2: JSON Representation\n\n```json\n{\n \"question1\": {\n \"current_date\": \"09-13-2011\",\n \"months_before\": 17,\n \"output_format\": \"mm/dd/yyyy\"\n },\n \"question2\": {\n \"today\": \"Sunday\",\n \"days_until_arrival\": \"X_prime\",\n \"output\": \"day_of_week\"\n },\n \"question3\": {\n \"days_ago\": \"Y_prime\",\n \"reference_day\": \"P_prime\",\n \"output\": \"day_of_week\"\n },\n \"question4\": {\n \"hours_until_deadline\": \"Z_prime\",\n \"reference_day\": \"Q_prime\",\n \"output\": \"alphabetical_list_of_days\"\n }\n}\n```\n\n## Step 3: Plan for Python Code\n\n1. **Question 1**: Subtract 17 months from 09-13-2011, get X, Y, Z.\n2. **Question 2**: X' = X + 10. Starting from Sunday, add X' days, get P.\n3. **Question 3**: Y' = Y + 16. P' = 45 days after P. On P', package was out for delivery Y' days ago. Find the day of the week Q.\n4. **Question 4**: Z' = Z - 1984. Q' = 75 days after Q. On Q', deadline is in Z' hours. Find all possible days of the week the deadline could fall on, output as an alphabetically ordered list.\n\nNow, let's write the code to solve the problem step by step.\n\n```python\ndef solve(symbols):\n from datetime import datetime, timedelta\n import calendar\n\n # Helper to get day of week string\n def day_of_week(dt):\n return calendar.day_name[dt.weekday()]\n\n # Helper to get day of week from string and offset\n def day_of_week_from_name(name, offset):\n days = list(calendar.day_name)\n idx = days.index(name)\n return days[(idx + offset) % 7]\n\n # Question 1\n current_date = datetime.strptime(symbols['question1']['current_date'], \"%m-%d-%Y\")\n months_before = symbols['question1']['months_before']\n # Subtract months\n year = current_date.year\n month = current_date.month\n day = current_date.day\n # Calculate new month and year\n total_months = year * 12 + month - 1 # months since year 0\n new_total_months = total_months - months_before\n new_year = new_total_months // 12\n new_month = new_total_months % 12 + 1\n # Handle day overflow (e.g., 31st in a month with 30 days)\n try:\n new_date = datetime(new_year, new_month, day)\n except ValueError:\n # Use last day of the month\n last_day = calendar.monthrange(new_year, new_month)[1]\n new_date = datetime(new_year, new_month, last_day)\n X = new_date.month\n Y = new_date.day\n Z = new_date.year\n\n # Question 2\n X_prime = X + 10\n today_name = symbols['question2']['today']\n # Find day of week after X' days\n P = day_of_week_from_name(today_name, X_prime)\n\n # Question 3\n Y_prime = Y + 16\n # P' is 45 days after P\n # To get P's date, let's pick an arbitrary date that is a 'P' (since only the day of week matters)\n # Let's use 2023-01-01 as a Sunday, then find the next P\n base_date = datetime(2023, 1, 1) # This is a Sunday\n days_to_P = (list(calendar.day_name).index(P) - base_date.weekday()) % 7\n P_date = base_date + timedelta(days=days_to_P)\n P_prime_date = P_date + timedelta(days=45)\n P_prime = day_of_week(P_prime_date)\n # On P' morning, package was out for delivery Y' days ago\n Q_date = P_prime_date - timedelta(days=Y_prime)\n Q = day_of_week(Q_date)\n\n # Question 4\n Z_prime = Z - 1984\n # Q' is 75 days after Q\n # Find a date that is Q, then add 75 days\n days_to_Q = (list(calendar.day_name).index(Q) - base_date.weekday()) % 7\n Q_date_base = base_date + timedelta(days=days_to_Q)\n Q_prime_date = Q_date_base + timedelta(days=75)\n Q_prime = day_of_week(Q_prime_date)\n # On Q' before noon, deadline is in Z' hours\n # Z' hours from before noon (say, 11:59am) on Q'\n # Find all possible days of the week the deadline could fall on\n # Z' hours may not be a whole number of days, so the deadline could be on two days\n # Let's assume \"before noon\" is 11:59am\n Q_prime_datetime = datetime(2023, 1, 1, 11, 59) # 2023-01-01 is a Sunday\n days_to_Q_prime = (list(calendar.day_name).index(Q_prime) - Q_prime_datetime.weekday()) % 7\n Q_prime_datetime = Q_prime_datetime + timedelta(days=days_to_Q_prime)\n deadline_datetime = Q_prime_datetime + timedelta(hours=Z_prime)\n # The deadline could be on the day of Q_prime or the next day if it crosses midnight\n days = set()\n days.add(calendar.day_name[Q_prime_datetime.weekday()])\n if deadline_datetime.date() != Q_prime_datetime.date():\n days.add(calendar.day_name[deadline_datetime.weekday()])\n answer_list = sorted(list(days))\n\n return answer_list\n```\n"
282
+ },
283
+ {
284
+ "role": "user",
285
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: ['Monday', 'Sunday']\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Incorrect Handling of \"Before Noon\" Ambiguity:** The code assumes \"before noon\" is always 11:59am, which is arbitrary. The actual range for \"before noon\" is from 12:00am to 11:59am, so the deadline could fall on up to three days (the day before, the same day, or the next day), depending on the value of Z'. The code only considers the current and next day, potentially missing valid days.\n\n2. **Arbitrary Base Date for Day-of-Week Calculations:** The code uses January 1, 2023 (a Sunday) as a base date for all day-of-week calculations. While this works for relative day-of-week math, it is not robust if the weekday mapping changes or if the base date is not Sunday. This could lead to errors if the base date is changed or misunderstood.\n\n3. **Hardcoded Year in Q' Calculations:** The code uses 2023 as the year for Q' calculations, which is unrelated to the actual years involved in the problem. This is acceptable for day-of-week math but is not generalizable or clearly justified.\n\n4. **No Input Validation:** The code does not validate the input symbols or check for missing or malformed data, which could lead to runtime errors if the input format changes.\n\n5. **Unnecessary Complexity in Date Calculations:** The code overcomplicates the process by creating arbitrary dates to match days of the week, rather than simply using modular arithmetic on the weekday indices.\n\n6. **No Explanation or Comments for Key Steps:** Some steps, such as why 2023-01-01 is chosen or how the day overflow is handled, lack clear comments, making the logic harder to follow and verify.\n\nDespite these issues, the code produces a plausible answer for the given input, but the ambiguity in \"before noon\" and the arbitrary choices in date handling could lead to incorrect or incomplete results in other scenarios.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
286
+ },
287
+ {
288
+ "role": "assistant",
289
+ "content": "Let's address the issues:\n\n### Issues to Fix\n\n1. **\"Before Noon\" Ambiguity:** The code should consider that \"before noon\" could be any time from 12:00am to 11:59am, so the deadline could fall on the same day, the next day, or even the previous day, depending on Z' (if Z' >= 12, the deadline could be on the previous day).\n2. **Arbitrary Base Date:** Instead of using arbitrary dates, use modular arithmetic for day-of-week calculations.\n3. **Unnecessary Date Construction:** For day-of-week math, use indices and modular arithmetic.\n4. **Clear Comments and Simpler Logic:** Add clear comments and use simpler, more robust logic.\n\n### Plan\n\n- For all day-of-week calculations, use indices (0=Monday, ..., 6=Sunday).\n- For \"before noon\", consider the full range (12:00am to 11:59am). The deadline could be on:\n - The same day if Z' < 12\n - The next day if Z' >= 12 and < 36, etc.\n - The previous day if Z' >= 24 (if before noon, and deadline is in 24+ hours, it could be the day before).\n- For the deadline, consider all possible days of the week the deadline could fall on, given the range of possible \"before noon\" times.\n\n### Revised Code\n\n```python\ndef solve(symbols):\n import calendar\n\n # Helper: get weekday index from name\n def weekday_index(name):\n return list(calendar.day_name).index(name)\n\n # Helper: get weekday name from index\n def weekday_name(idx):\n return list(calendar.day_name)[idx % 7]\n\n # Question 1: Subtract 17 months from 09-13-2011\n m, d, y = 9, 13, 2011\n months_before = 17\n total_months = y * 12 + m - 1\n new_total_months = total_months - months_before\n new_y = new_total_months // 12\n new_m = new_total_months % 12 + 1\n # Handle day overflow\n import calendar\n last_day = calendar.monthrange(new_y, new_m)[1]\n new_d = min(d, last_day)\n X, Y, Z = new_m, new_d, new_y\n\n # Question 2: X' = X + 10, today is Sunday\n X_prime = X + 10\n today_idx = weekday_index(\"Sunday\")\n P_idx = (today_idx + X_prime) % 7\n P = weekday_name(P_idx)\n\n # Question 3: Y' = Y + 16, P' = 45 days after P\n Y_prime = Y + 16\n P_prime_idx = (P_idx + 45) % 7\n P_prime = weekday_name(P_prime_idx)\n # On P' morning, package was out for delivery Y' days ago\n Q_idx = (P_prime_idx - (Y_prime % 7)) % 7\n Q = weekday_name(Q_idx)\n\n # Question 4: Z' = Z - 1984, Q' = 75 days after Q\n Z_prime = Z - 1984\n Q_prime_idx = (Q_idx + 75) % 7\n Q_prime = weekday_name(Q_prime_idx)\n\n # On Q' before noon, deadline is in exactly Z' hours\n # \"Before noon\" is any time from 0:00 to 11:59, so the deadline could be on:\n # - Q' if Z' < 12\n # - Q'+1 if 12 <= Z' < 36\n # - Q'+2 if 36 <= Z' < 60, etc.\n # But since \"before noon\" could be any time in that range, the deadline could fall on up to two days:\n # - If Z' is not a multiple of 24, the deadline could be on Q' or Q'+1\n # - If Z' is a multiple of 24, the deadline could be on Q' or Q'+1 (since 0:00 + 24h = next day 0:00)\n # So, for all t in [0, 12), deadline is Q'+(Z'//24)\n # for t in [0, 12), deadline is Q'+(Z'//24) if Z'%24 < 12, else Q'+(Z'//24)+1\n # But since \"before noon\" is a range, the deadline could be on Q'+(Z'//24) or Q'+(Z'//24)+1\n\n # Let's compute all possible days:\n days = set()\n for hour in range(0, 12): # 0:00 to 11:59\n deadline_offset = (Z_prime + hour) // 24\n day_idx = (Q_prime_idx + deadline_offset) % 7\n days.add(weekday_name(day_idx))\n answer_list = sorted(days)\n\n return answer_list\n```\nThis code now robustly handles the \"before noon\" ambiguity and uses modular arithmetic for all day-of-week calculations."
290
+ },
291
+ {
292
+ "role": "user",
293
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: ['Monday']\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Correctness of Date Calculation for Question 1:** \n The code calculates the date 17 months before 09-13-2011 correctly, resulting in 04/13/2010. It then assigns X, Y, Z as the month, day, and year, respectively. This is correct.\n\n2. **Correctness of Day-of-Week Calculations:** \n The code correctly computes the day of the week for each step using modular arithmetic and the calendar module. The logic for adding days and wrapping around the week is sound.\n\n3. **Handling of \"Before Noon\" Range in Question 4:** \n The code iterates over all possible hours from 0 to 11 (representing \"before noon\") and computes the possible deadline days, collecting all unique results. This approach is correct and ensures all possible days are considered.\n\n4. **Output Format:** \n The code returns the answer as a sorted list of days, as required by the question.\n\n5. **No Unused or Incorrect Returns:** \n The code does not return None and does not print instead of returning.\n\n6. **No Simpler Solution:** \n The code is already quite direct and clear for the problem's requirements.\n\n**Summary:** \nThere are no correctness or logic issues in the code. The code is input-dependent, uses the extracted symbols, and returns the answer in the required format. The output of `['Monday']` is correct for the given input. No improvements are necessary.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
294
+ }
295
+ ]
296
+ }
297
+ },
298
+ {
299
+ "step": "finished",
300
+ "message": "Solution completed successfully!",
301
+ "iteration": null,
302
+ "prompt_details": null
303
+ }
304
+ ],
305
+ "timestamp": 1760571286.6724572
306
+ }
src/pips/saved_examples/word_sorting.json ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "problem": "Solution completed successfully!",
3
+ "history": [
4
+ {
5
+ "role": "user",
6
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity",
7
+ "metadata": {
8
+ "component": "user",
9
+ "title": "User"
10
+ }
11
+ },
12
+ {
13
+ "role": "assistant",
14
+ "content": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
15
+ "metadata": {
16
+ "component": "mode_selection",
17
+ "title": "Mode Selection"
18
+ }
19
+ },
20
+ {
21
+ "role": "assistant",
22
+ "content": "To solve this problem, we need to sort the given list of words according to a modified version of the English alphabet where 'x' is the last letter. This means the order of the alphabet is: a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, y, z, x.\n\nLet's break down the steps:\n\n1. **Extract the relevant information**: We have a list of words and a modified alphabet order.\n2. **Plan the sorting**: We need to sort the words based on the new alphabet order. This involves creating a custom sorting function that uses the modified order.\n3. **Implement the solution**: Write a Python function to perform the sorting and return the sorted list as a comma-separated string.\n\nLet's start by extracting the relevant information in JSON format.\n\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\nNow, let's write the Python function to sort the words according to the new alphabet order.",
23
+ "metadata": {
24
+ "component": "solver",
25
+ "title": "\ud83e\udde0 Solver (iteration 0) \u00b7 OpenAIModel"
26
+ }
27
+ },
28
+ {
29
+ "role": "assistant",
30
+ "content": "1. The code is missing entirely, which means it does not perform any operations to solve the problem. As a result, it does not sort the words according to the new alphabet order or return any output.\n\n2. The code should define a function or logic to sort the words based on the provided custom alphabet order and return the sorted list as a comma-separated string.\n\n3. The extracted symbols are correct, but they are not utilized in any way due to the absence of code. The `alphabet_order` and `words` need to be used to implement the sorting logic.",
31
+ "metadata": {
32
+ "component": "critic",
33
+ "title": "\ud83e\uddfe Critic (iteration 1) \u00b7 AI Code Reviewer"
34
+ }
35
+ },
36
+ {
37
+ "role": "assistant",
38
+ "content": "To address the issues, we need to implement a Python function that sorts the words based on the custom alphabet order where 'x' is the last letter. Here's the plan:\n\n1. **Create a mapping**: We will create a dictionary that maps each letter to its position in the new alphabet order.\n2. **Sort the words**: Use the custom mapping to sort the list of words.\n3. **Return the result**: Convert the sorted list into a comma-separated string and return it.\n\nLet's implement this in the `solve` function.\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```\n\nThis code defines a `solve` function that takes the extracted symbols, creates a custom order map, sorts the words based on this map, and returns the sorted words as a comma-separated string. This should resolve the issues and provide the correct output.",
39
+ "metadata": {
40
+ "component": "solver",
41
+ "title": "\ud83e\udde0 Solver (iteration 1) \u00b7 OpenAIModel"
42
+ }
43
+ },
44
+ {
45
+ "role": "assistant",
46
+ "content": "1. The code attempts to extract words and a custom alphabet order from the `symbols` input, but the extracted symbols are empty, leading to a KeyError when trying to access `symbols[\"words\"]` and `symbols[\"alphabet_order\"]`.\n\n2. The code does not handle the input directly from the question, which means it cannot function correctly without the necessary input data being provided in the expected format.\n\n3. The code execution results in an error due to the incorrect handling of the input, specifically the absence of the required keys in the `symbols` dictionary.\n\n4. The code does not address the specific requirement of the problem, which is to sort the words based on a new alphabet where 'x' is the last letter, as it relies on an undefined `alphabet_order` from the input. \n\n5. The code returns `None` due to the exception, which is not the expected behavior for solving the problem. The function should return a sorted, comma-separated string of words.",
47
+ "metadata": {
48
+ "component": "critic",
49
+ "title": "\ud83e\uddfe Critic (iteration 2) \u00b7 AI Code Reviewer"
50
+ }
51
+ },
52
+ {
53
+ "role": "assistant",
54
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyzx\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```",
55
+ "metadata": {
56
+ "component": "solver",
57
+ "title": "\ud83e\udde0 Solver (iteration 2) \u00b7 OpenAIModel"
58
+ }
59
+ },
60
+ {
61
+ "role": "assistant",
62
+ "content": "1. **Incorrect Alphabet Order**: The custom alphabet order provided in the code is incorrect. The letter 'x' should be the last letter, but the order given is \"abcdefghijklmnopqrstuvwyzx\", which places 'z' after 'w' and before 'x'. The correct order should be \"abcdefghijklmnopqrstuvwyz\" with 'x' at the end.\n\n2. **Incorrect Sorting**: Due to the incorrect alphabet order, the sorting of words is not performed as intended according to the problem statement. This results in an incorrect order of words in the output.\n\n3. **Output Format**: The code correctly returns a comma-separated string of sorted words, which matches the required output format. However, due to the incorrect sorting, the output is not as expected.",
63
+ "metadata": {
64
+ "component": "critic",
65
+ "title": "\ud83e\uddfe Critic (iteration 3) \u00b7 AI Code Reviewer"
66
+ }
67
+ },
68
+ {
69
+ "role": "assistant",
70
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyz\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"] + \"x\" # Correctly place 'x' at the end\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```",
71
+ "metadata": {
72
+ "component": "solver",
73
+ "title": "\ud83e\udde0 Solver (iteration 3) \u00b7 OpenAIModel"
74
+ }
75
+ },
76
+ {
77
+ "role": "assistant",
78
+ "content": "1. **Incorrect Alphabet Order**: The code incorrectly appends 'x' to the end of the `alphabet_order` string, which already includes 'x' in its correct position. This results in an incorrect order where 'x' is considered twice, potentially causing incorrect sorting behavior.\n\n2. **Incorrect Sorting**: Due to the incorrect alphabet order, the sorting of words is not performed according to the specified new alphabet where 'x' should be the last letter. This impacts the correctness of the answer.\n\n3. **Output Format**: The code correctly returns a comma-separated string of sorted words, which matches the required output format. However, due to the incorrect sorting, the output is not as expected.",
79
+ "metadata": {
80
+ "component": "critic",
81
+ "title": "\ud83e\uddfe Critic (iteration 4) \u00b7 AI Code Reviewer"
82
+ }
83
+ },
84
+ {
85
+ "role": "assistant",
86
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyz\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"] + \"x\" # Correctly place 'x' at the end\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```\n\nFINISHED",
87
+ "metadata": {
88
+ "component": "solver",
89
+ "title": "\ud83e\udde0 Solver (iteration 4) \u00b7 OpenAIModel"
90
+ }
91
+ },
92
+ {
93
+ "role": "assistant",
94
+ "content": "Mode chosen: Iterative coding",
95
+ "metadata": {
96
+ "component": "mode_result",
97
+ "title": "Mode Choice"
98
+ }
99
+ },
100
+ {
101
+ "role": "assistant",
102
+ "content": "**Final Answer:** FINAL ANSWER: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\n\n**Method:** Iterative coding",
103
+ "metadata": {
104
+ "component": "summary",
105
+ "title": "Summary"
106
+ }
107
+ }
108
+ ],
109
+ "symbols": {
110
+ "words": [
111
+ "oversimplifications",
112
+ "consternate",
113
+ "administrations",
114
+ "collaborations",
115
+ "blest",
116
+ "buxton",
117
+ "manipulations",
118
+ "tariff",
119
+ "quizzes",
120
+ "sound",
121
+ "xerxes",
122
+ "institutionalization",
123
+ "proximity"
124
+ ],
125
+ "alphabet_order": "abcdefghijklmnopqrstuvwyz"
126
+ },
127
+ "code": "\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"] + \"x\" # Correctly place 'x' at the end\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n",
128
+ "status": "\u2705 Completed (iterative coding).",
129
+ "method": "Iterative coding",
130
+ "decision": {
131
+ "use_code": true,
132
+ "scores": [
133
+ 0.8,
134
+ 0.6,
135
+ 0.7,
136
+ 0.9,
137
+ 0.9,
138
+ 0.8,
139
+ 0.7,
140
+ 0.8,
141
+ 0.8,
142
+ 0.8
143
+ ],
144
+ "average_score": 0.78,
145
+ "raw_response": "To evaluate the likelihood of solving the target question using either executable Python code or chain-of-thought reasoning, let's consider each criterion:\n\n1. **Simple Formalizability**: The task involves sorting words based on a modified alphabet order. This can be directly expressed in code by defining a custom sorting key. The probability is relatively high since sorting is a well-defined operation in programming.\n - Probability: 0.8\n\n2. **Straightforward Executability**: Writing code to sort words with a custom alphabet is straightforward, but there might be subtle issues in defining the custom order correctly. The first attempt might need adjustments.\n - Probability: 0.6\n\n3. **Robust Systematic Search**: Sorting is a systematic process, and coding a custom sort function should reliably find the correct order if the custom alphabet is correctly implemented.\n - Probability: 0.7\n\n4. **Manageable State Representation**: The state (i.e., the order of letters) can be easily represented in code using a dictionary or list. This is not complex.\n - Probability: 0.9\n\n5. **Structured Knowledge Encoding**: The custom alphabet order can be encoded as a simple data structure. This is straightforward and doesn't require flexible reasoning.\n - Probability: 0.9\n\n6. **Hallucination Risk Reduction**: Code execution is less prone to making unwarranted assumptions compared to reasoning, especially for sorting tasks.\n - Probability: 0.8\n\n7. **Arithmetic and Data Processing Advantage**: The task doesn't involve complex arithmetic but does involve data handling (sorting), which code can handle efficiently.\n - Probability: 0.7\n\n8. **Branching and Case Handling Advantage**: Sorting with a custom order involves handling each word according to the new alphabet, which code can systematically manage.\n - Probability: 0.8\n\n9. **Algorithmic Reliability Over Heuristics**: A deterministic sorting algorithm in code is likely to be more reliable than heuristic reasoning for this task.\n - Probability: 0.8\n\n10. **Overall Comparative Success**: Given the nature of the task, code is likely to be more reliable than chain-of-thought reasoning due to the need for precise handling of the custom alphabet order.\n - Probability: 0.8\n\nFINAL ANSWER: [0.8, 0.6, 0.7, 0.9, 0.9, 0.8, 0.7, 0.8, 0.8, 0.8]",
146
+ "prompt": [
147
+ {
148
+ "role": "user",
149
+ "content": [
150
+ {
151
+ "type": "text",
152
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
153
+ },
154
+ {
155
+ "type": "text",
156
+ "text": "TARGET QUESTION:\nConsider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
157
+ }
158
+ ]
159
+ }
160
+ ],
161
+ "error": null
162
+ },
163
+ "steps": [
164
+ {
165
+ "step": "mode_selection",
166
+ "message": "Choosing between chain-of-thought reasoning and iterative coding\u2026",
167
+ "iteration": null,
168
+ "prompt_details": {
169
+ "description": "Choosing between chain-of-thought and iterative coding",
170
+ "conversation": [
171
+ {
172
+ "role": "user",
173
+ "content": [
174
+ {
175
+ "type": "text",
176
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
177
+ },
178
+ {
179
+ "type": "text",
180
+ "text": "TARGET QUESTION:\nConsider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
181
+ }
182
+ ]
183
+ }
184
+ ]
185
+ }
186
+ },
187
+ {
188
+ "step": "mode_selection",
189
+ "message": "Average code suitability score: 0.78. Proceeding with iterative code generation.",
190
+ "iteration": null,
191
+ "prompt_details": {
192
+ "description": "Choosing between chain-of-thought and iterative coding",
193
+ "conversation": [
194
+ {
195
+ "role": "user",
196
+ "content": [
197
+ {
198
+ "type": "text",
199
+ "text": "You will self-reflect to estimate whether you are more likely to correctly solve a given target question by writing executable Python code or by using chain-of-thought (natural-language) reasoning.\n\n**IMPORTANT:**\n- This is a hypothetical evaluation.\n- **You must NOT attempt to answer, solve, write code, or reason through the target question yet.**\n- Instead, you must reflect carefully and conservatively on your expected ability if you were to attempt solving the question through either method.\n\nSolution Expectations:\n- You may assume standard library modules are allowed for code.\n- You may NOT call external services, APIs, databases, or other LLMs.\n- The code must be self-contained and executable without internet access.\n- Chain-of-thought reasoning must be clear, logically sound, and internally verifiable without external tools.\n\n**CRITICAL GUIDANCE:**\n- **Be cautious, not optimistic.** \n Overestimating your capabilities will lead to choosing a method you cannot successfully complete.\n- **If you feel any uncertainty, complexity, or ambiguity, lower your probability accordingly.**\n- **Assume that even small mistakes can cause failure** when writing code or reasoning through complex tasks.\n- **Use conservative estimates.**\n- If unsure between two options, **prefer lower probabilities rather than guessing high**.\n\nHere are the self-reflection sub-questions you must answer hypothetically:\n\n1. **Simple Formalizability** \u2014 *What is the probability that the full solution can be easily and directly expressed as simple, deterministic code, without needing complex transformations or deep insight?*\n\n2. **Straightforward Executability** \u2014 *What is the probability that a first attempt at writing code would execute correctly without needing debugging, even if the problem has subtle or complex aspects?*\n\n3. **Robust Systematic Search** \u2014 *What is the probability that coding a systematic method (like brute-force search or recursion) would reliably find the correct answer, without missing hidden constraints or introducing edge-case errors?*\n\n4. **Manageable State Representation** \u2014 *What is the probability that all intermediate concepts, variables, and conditions can be simply and explicitly represented in code, without requiring difficult or error-prone state tracking?*\n\n5. **Structured Knowledge Encoding** \u2014 *What is the probability that all required background knowledge can be neatly encoded in code (e.g., as rules, formulas, or data), rather than needing flexible, intuitive understanding better suited to reasoning?*\n\n6. **Hallucination Risk Reduction** \u2014 *What is the probability that code execution would more reliably avoid fabricated steps or unwarranted assumptions compared to chain-of-thought reasoning?*\n\n7. **Arithmetic and Data Processing Advantage** \u2014 *What is the probability that the problem requires extensive or error-prone arithmetic/data handling that code could perform perfectly, but that chain-of-thought would likely fumble?*\n\n8. **Branching and Case Handling Advantage** \u2014 *What is the probability that the solution involves many branching conditions, special cases, or exceptions that code can handle systematically but chain-of-thought might overlook?*\n\n9. **Algorithmic Reliability Over Heuristics** \u2014 *What is the probability that following a deterministic algorithm in code would reach the correct answer more reliably than relying on intuitive or heuristic chain-of-thought reasoning?*\n\n10. **Overall Comparative Success** \u2014 *Considering all factors, what is the probability that code will ultimately produce a correct solution more reliably than chain-of-thought reasoning for this question?*\n\nAfter thoroughly reasoning through each criterion:\n\n- Output a single list of 10 probability scores (each between 0 and 1) as your FINAL ANSWER, in order:\n - Scores 1\u201310 correspond to the ten sub-questions above.\n\n**Additional Instructions:**\n- Explicitly reason through each criterion carefully before giving a probability.\n- If uncertain or if the problem seems complex, favor lower probabilities to reflect the difficulty.\n- Make sure to put only the list after FINAL ANSWER.\n- **Under no circumstances should you write, sketch, pseudocode, or attempt any part of the solution itself during this reflection phase.**\n\nTARGET QUESTION:\nAt the end of your response, output only the list of 10 probabilities inside square brackets after the text 'FINAL ANSWER:'."
200
+ },
201
+ {
202
+ "type": "text",
203
+ "text": "TARGET QUESTION:\nConsider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
204
+ }
205
+ ]
206
+ }
207
+ ],
208
+ "raw_response": "To evaluate the likelihood of solving the target question using either executable Python code or chain-of-thought reasoning, let's consider each criterion:\n\n1. **Simple Formalizability**: The task involves sorting words based on a modified alphabet order. This can be directly expressed in code by defining a custom sorting key. The probability is relatively high since sorting is a well-defined operation in programming.\n - Probability: 0.8\n\n2. **Straightforward Executability**: Writing code to sort words with a custom alphabet is straightforward, but there might be subtle issues in defining the custom order correctly. The first attempt might need adjustments.\n - Probability: 0.6\n\n3. **Robust Systematic Search**: Sorting is a systematic process, and coding a custom sort function should reliably find the correct order if the custom alphabet is correctly implemented.\n - Probability: 0.7\n\n4. **Manageable State Representation**: The state (i.e., the order of letters) can be easily represented in code using a dictionary or list. This is not complex.\n - Probability: 0.9\n\n5. **Structured Knowledge Encoding**: The custom alphabet order can be encoded as a simple data structure. This is straightforward and doesn't require flexible reasoning.\n - Probability: 0.9\n\n6. **Hallucination Risk Reduction**: Code execution is less prone to making unwarranted assumptions compared to reasoning, especially for sorting tasks.\n - Probability: 0.8\n\n7. **Arithmetic and Data Processing Advantage**: The task doesn't involve complex arithmetic but does involve data handling (sorting), which code can handle efficiently.\n - Probability: 0.7\n\n8. **Branching and Case Handling Advantage**: Sorting with a custom order involves handling each word according to the new alphabet, which code can systematically manage.\n - Probability: 0.8\n\n9. **Algorithmic Reliability Over Heuristics**: A deterministic sorting algorithm in code is likely to be more reliable than heuristic reasoning for this task.\n - Probability: 0.8\n\n10. **Overall Comparative Success**: Given the nature of the task, code is likely to be more reliable than chain-of-thought reasoning due to the need for precise handling of the custom alphabet order.\n - Probability: 0.8\n\nFINAL ANSWER: [0.8, 0.6, 0.7, 0.9, 0.9, 0.8, 0.7, 0.8, 0.8, 0.8]",
209
+ "error": null
210
+ }
211
+ },
212
+ {
213
+ "step": "initial_generation",
214
+ "message": "Generating first solution\u2026",
215
+ "iteration": null,
216
+ "prompt_details": {
217
+ "description": "Initial solution generation",
218
+ "conversation": [
219
+ {
220
+ "role": "system",
221
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
222
+ },
223
+ {
224
+ "role": "user",
225
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
226
+ }
227
+ ]
228
+ }
229
+ },
230
+ {
231
+ "step": "code_checking",
232
+ "message": "Running code critic (iteration 1)...",
233
+ "iteration": 1,
234
+ "prompt_details": {
235
+ "description": "Code quality analysis and critique (iteration 1)",
236
+ "conversation": [
237
+ {
238
+ "role": "system",
239
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
240
+ },
241
+ {
242
+ "role": "user",
243
+ "content": "Question: Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\n```python\n\n```\n\nCode execution result:\n```\nReturn value: None\nStandard output: \nExceptions: Error: name 'solve' is not defined\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
244
+ }
245
+ ]
246
+ }
247
+ },
248
+ {
249
+ "step": "refinement",
250
+ "message": "Refining solution (iteration 1)...",
251
+ "iteration": 1,
252
+ "prompt_details": {
253
+ "description": "Solution refinement (iteration 1)",
254
+ "conversation": [
255
+ {
256
+ "role": "system",
257
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
258
+ },
259
+ {
260
+ "role": "user",
261
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
262
+ },
263
+ {
264
+ "role": "assistant",
265
+ "content": "To solve this problem, we need to sort the given list of words according to a modified version of the English alphabet where 'x' is the last letter. This means the order of the alphabet is: a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, y, z, x.\n\nLet's break down the steps:\n\n1. **Extract the relevant information**: We have a list of words and a modified alphabet order.\n2. **Plan the sorting**: We need to sort the words based on the new alphabet order. This involves creating a custom sorting function that uses the modified order.\n3. **Implement the solution**: Write a Python function to perform the sorting and return the sorted list as a comma-separated string.\n\nLet's start by extracting the relevant information in JSON format.\n\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\nNow, let's write the Python function to sort the words according to the new alphabet order."
266
+ },
267
+ {
268
+ "role": "user",
269
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: name 'solve' is not defined\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code is missing entirely, which means it does not perform any operations to solve the problem. As a result, it does not sort the words according to the new alphabet order or return any output.\n\n2. The code should define a function or logic to sort the words based on the provided custom alphabet order and return the sorted list as a comma-separated string.\n\n3. The extracted symbols are correct, but they are not utilized in any way due to the absence of code. The `alphabet_order` and `words` need to be used to implement the sorting logic.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
270
+ }
271
+ ]
272
+ }
273
+ },
274
+ {
275
+ "step": "code_checking",
276
+ "message": "Running code critic (iteration 2)...",
277
+ "iteration": 2,
278
+ "prompt_details": {
279
+ "description": "Code quality analysis and critique (iteration 2)",
280
+ "conversation": [
281
+ {
282
+ "role": "system",
283
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
284
+ },
285
+ {
286
+ "role": "user",
287
+ "content": "Question: Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n\"\"\n```\n\n```python\n\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n\n```\n\nCode execution result:\n```\nReturn value: None\nStandard output: \nExceptions: Error: string indices must be integers, not 'str'\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
288
+ }
289
+ ]
290
+ }
291
+ },
292
+ {
293
+ "step": "refinement",
294
+ "message": "Refining solution (iteration 2)...",
295
+ "iteration": 2,
296
+ "prompt_details": {
297
+ "description": "Solution refinement (iteration 2)",
298
+ "conversation": [
299
+ {
300
+ "role": "system",
301
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
302
+ },
303
+ {
304
+ "role": "user",
305
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
306
+ },
307
+ {
308
+ "role": "assistant",
309
+ "content": "To solve this problem, we need to sort the given list of words according to a modified version of the English alphabet where 'x' is the last letter. This means the order of the alphabet is: a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, y, z, x.\n\nLet's break down the steps:\n\n1. **Extract the relevant information**: We have a list of words and a modified alphabet order.\n2. **Plan the sorting**: We need to sort the words based on the new alphabet order. This involves creating a custom sorting function that uses the modified order.\n3. **Implement the solution**: Write a Python function to perform the sorting and return the sorted list as a comma-separated string.\n\nLet's start by extracting the relevant information in JSON format.\n\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\nNow, let's write the Python function to sort the words according to the new alphabet order."
310
+ },
311
+ {
312
+ "role": "user",
313
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: name 'solve' is not defined\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code is missing entirely, which means it does not perform any operations to solve the problem. As a result, it does not sort the words according to the new alphabet order or return any output.\n\n2. The code should define a function or logic to sort the words based on the provided custom alphabet order and return the sorted list as a comma-separated string.\n\n3. The extracted symbols are correct, but they are not utilized in any way due to the absence of code. The `alphabet_order` and `words` need to be used to implement the sorting logic.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
314
+ },
315
+ {
316
+ "role": "assistant",
317
+ "content": "To address the issues, we need to implement a Python function that sorts the words based on the custom alphabet order where 'x' is the last letter. Here's the plan:\n\n1. **Create a mapping**: We will create a dictionary that maps each letter to its position in the new alphabet order.\n2. **Sort the words**: Use the custom mapping to sort the list of words.\n3. **Return the result**: Convert the sorted list into a comma-separated string and return it.\n\nLet's implement this in the `solve` function.\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```\n\nThis code defines a `solve` function that takes the extracted symbols, creates a custom order map, sorts the words based on this map, and returns the sorted words as a comma-separated string. This should resolve the issues and provide the correct output."
318
+ },
319
+ {
320
+ "role": "user",
321
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: string indices must be integers, not 'str'\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code attempts to extract words and a custom alphabet order from the `symbols` input, but the extracted symbols are empty, leading to a KeyError when trying to access `symbols[\"words\"]` and `symbols[\"alphabet_order\"]`.\n\n2. The code does not handle the input directly from the question, which means it cannot function correctly without the necessary input data being provided in the expected format.\n\n3. The code execution results in an error due to the incorrect handling of the input, specifically the absence of the required keys in the `symbols` dictionary.\n\n4. The code does not address the specific requirement of the problem, which is to sort the words based on a new alphabet where 'x' is the last letter, as it relies on an undefined `alphabet_order` from the input. \n\n5. The code returns `None` due to the exception, which is not the expected behavior for solving the problem. The function should return a sorted, comma-separated string of words.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
322
+ }
323
+ ]
324
+ }
325
+ },
326
+ {
327
+ "step": "code_checking",
328
+ "message": "Running code critic (iteration 3)...",
329
+ "iteration": 3,
330
+ "prompt_details": {
331
+ "description": "Code quality analysis and critique (iteration 3)",
332
+ "conversation": [
333
+ {
334
+ "role": "system",
335
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
336
+ },
337
+ {
338
+ "role": "user",
339
+ "content": "Question: Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyzx\"\n}\n```\n\n```python\n\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n\n```\n\nCode execution result:\n```\nReturn value: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\nStandard output: \nExceptions: None\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
340
+ }
341
+ ]
342
+ }
343
+ },
344
+ {
345
+ "step": "refinement",
346
+ "message": "Refining solution (iteration 3)...",
347
+ "iteration": 3,
348
+ "prompt_details": {
349
+ "description": "Solution refinement (iteration 3)",
350
+ "conversation": [
351
+ {
352
+ "role": "system",
353
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
354
+ },
355
+ {
356
+ "role": "user",
357
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
358
+ },
359
+ {
360
+ "role": "assistant",
361
+ "content": "To solve this problem, we need to sort the given list of words according to a modified version of the English alphabet where 'x' is the last letter. This means the order of the alphabet is: a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, y, z, x.\n\nLet's break down the steps:\n\n1. **Extract the relevant information**: We have a list of words and a modified alphabet order.\n2. **Plan the sorting**: We need to sort the words based on the new alphabet order. This involves creating a custom sorting function that uses the modified order.\n3. **Implement the solution**: Write a Python function to perform the sorting and return the sorted list as a comma-separated string.\n\nLet's start by extracting the relevant information in JSON format.\n\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\nNow, let's write the Python function to sort the words according to the new alphabet order."
362
+ },
363
+ {
364
+ "role": "user",
365
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: name 'solve' is not defined\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code is missing entirely, which means it does not perform any operations to solve the problem. As a result, it does not sort the words according to the new alphabet order or return any output.\n\n2. The code should define a function or logic to sort the words based on the provided custom alphabet order and return the sorted list as a comma-separated string.\n\n3. The extracted symbols are correct, but they are not utilized in any way due to the absence of code. The `alphabet_order` and `words` need to be used to implement the sorting logic.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
366
+ },
367
+ {
368
+ "role": "assistant",
369
+ "content": "To address the issues, we need to implement a Python function that sorts the words based on the custom alphabet order where 'x' is the last letter. Here's the plan:\n\n1. **Create a mapping**: We will create a dictionary that maps each letter to its position in the new alphabet order.\n2. **Sort the words**: Use the custom mapping to sort the list of words.\n3. **Return the result**: Convert the sorted list into a comma-separated string and return it.\n\nLet's implement this in the `solve` function.\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```\n\nThis code defines a `solve` function that takes the extracted symbols, creates a custom order map, sorts the words based on this map, and returns the sorted words as a comma-separated string. This should resolve the issues and provide the correct output."
370
+ },
371
+ {
372
+ "role": "user",
373
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: string indices must be integers, not 'str'\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code attempts to extract words and a custom alphabet order from the `symbols` input, but the extracted symbols are empty, leading to a KeyError when trying to access `symbols[\"words\"]` and `symbols[\"alphabet_order\"]`.\n\n2. The code does not handle the input directly from the question, which means it cannot function correctly without the necessary input data being provided in the expected format.\n\n3. The code execution results in an error due to the incorrect handling of the input, specifically the absence of the required keys in the `symbols` dictionary.\n\n4. The code does not address the specific requirement of the problem, which is to sort the words based on a new alphabet where 'x' is the last letter, as it relies on an undefined `alphabet_order` from the input. \n\n5. The code returns `None` due to the exception, which is not the expected behavior for solving the problem. The function should return a sorted, comma-separated string of words.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
374
+ },
375
+ {
376
+ "role": "assistant",
377
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyzx\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```"
378
+ },
379
+ {
380
+ "role": "user",
381
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Incorrect Alphabet Order**: The custom alphabet order provided in the code is incorrect. The letter 'x' should be the last letter, but the order given is \"abcdefghijklmnopqrstuvwyzx\", which places 'z' after 'w' and before 'x'. The correct order should be \"abcdefghijklmnopqrstuvwyz\" with 'x' at the end.\n\n2. **Incorrect Sorting**: Due to the incorrect alphabet order, the sorting of words is not performed as intended according to the problem statement. This results in an incorrect order of words in the output.\n\n3. **Output Format**: The code correctly returns a comma-separated string of sorted words, which matches the required output format. However, due to the incorrect sorting, the output is not as expected.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
382
+ }
383
+ ]
384
+ }
385
+ },
386
+ {
387
+ "step": "code_checking",
388
+ "message": "Running code critic (iteration 4)...",
389
+ "iteration": 4,
390
+ "prompt_details": {
391
+ "description": "Code quality analysis and critique (iteration 4)",
392
+ "conversation": [
393
+ {
394
+ "role": "system",
395
+ "content": "You will be given a question and a code solution and you must judge the quality of the code for solving the problem.\n \nLook for any of the following issues in the code:\n- The code should be input dependent, meaning it should use the input symbols to compute the answer. It is OK for the code to be specialized to the input (i.e. the reasoning itself may be hardcoded, like a decision tree where the branches are hardcoded).\n- The code should not return None unless \"None\" is the correct answer.\n- The code should return the answer, not just print it. If the question asks for a multiple choice answer, the code should return the choice as described in the question.\n- There should not be any example usage of the code.\n- If there is a simpler way to solve the problem, please describe it.\n- If there are any clear bugs in the code which impact the correctness of the answer, please describe them.\n- If there are any issues with the extracted symbols, please describe them as well, but separate these issues from the issues with the code.\n- If it is possible to sanity check the output of the code, please do so and describe if there are any obvious issues with the output and how the code could be fixed to avoid these issues.\n\n\n\n\nAfter analyzing the code in depth, output a concrete and concise summary of the issues that are present, do not include any code examples. Please order the issues by impact on answer correctness."
396
+ },
397
+ {
398
+ "role": "user",
399
+ "content": "Question: Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity\n\nThe following are extracted symbols from the question in JSON format followed by a Python program which takes the JSON as an argument called `symbols` and computes the answer.\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyz\"\n}\n```\n\n```python\n\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"] + \"x\" # Correctly place 'x' at the end\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n\n```\n\nCode execution result:\n```\nReturn value: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\nStandard output: \nExceptions: None\n```\n\nOutput a concrete and concise summary of only the issues that are present, do not include any code examples.\n"
400
+ }
401
+ ]
402
+ }
403
+ },
404
+ {
405
+ "step": "refinement",
406
+ "message": "Refining solution (iteration 4)...",
407
+ "iteration": 4,
408
+ "prompt_details": {
409
+ "description": "Solution refinement (iteration 4)",
410
+ "conversation": [
411
+ {
412
+ "role": "system",
413
+ "content": "You will be given a question and you must answer it by extracting relevant symbols in JSON format and then writing a Python program to calculate the final answer.\n\nYou MUST always plan extensively before outputting any symbols or code.\n\nYou MUST iterate and keep going until the problem is solved.\n\n# Workflow\n\n## Problem Solving Steps\n1. First extract relevant information from the input as JSON. Try to represent the relevant information in as much of a structured format as possible to help with further reasoning/processing.\n2. Using the information extracted, determine a reasonable approach to solving the problem using code, such that executing the code will return the final answer.\n3. Write a Python program to calculate and return the final answer. Use comments to explain the structure of the code and do not use a main() function.\nThe JSON must be enclosed in a markdown code block and the Python function must be in a separate markdown code block and be called `solve` and accept a single input called `symbols` representing the JSON information extracted. Do not include any `if __name__ == \"__main__\"` statement and you can assume the JSON will be loaded into the variable called `symbols` by the user.\nThe Python code should not just return the answer or perform all reasoning in comments and instead leverage the code itself to perform the reasoning.\nBe careful that the code returns the answer as expected by the question, for instance, if the question is multiple choice, the code must return the choice as described in the question.\nBe sure to always output a JSON code block and a Python code block.\nMake sure to follow these formatting requirements exactly.\n"
414
+ },
415
+ {
416
+ "role": "user",
417
+ "content": "Consider a new alphabet whose letters have the same order as the English alphabet, except that x is the last letter. Sort the following words with the new alphabet and separate them with comma: oversimplifications, consternate, administrations, collaborations, blest, buxton, manipulations, tariff, quizzes, sound, xerxes, institutionalization, proximity"
418
+ },
419
+ {
420
+ "role": "assistant",
421
+ "content": "To solve this problem, we need to sort the given list of words according to a modified version of the English alphabet where 'x' is the last letter. This means the order of the alphabet is: a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, y, z, x.\n\nLet's break down the steps:\n\n1. **Extract the relevant information**: We have a list of words and a modified alphabet order.\n2. **Plan the sorting**: We need to sort the words based on the new alphabet order. This involves creating a custom sorting function that uses the modified order.\n3. **Implement the solution**: Write a Python function to perform the sorting and return the sorted list as a comma-separated string.\n\nLet's start by extracting the relevant information in JSON format.\n\n```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwxyzx\"\n}\n```\n\nNow, let's write the Python function to sort the words according to the new alphabet order."
422
+ },
423
+ {
424
+ "role": "user",
425
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: name 'solve' is not defined\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code is missing entirely, which means it does not perform any operations to solve the problem. As a result, it does not sort the words according to the new alphabet order or return any output.\n\n2. The code should define a function or logic to sort the words based on the provided custom alphabet order and return the sorted list as a comma-separated string.\n\n3. The extracted symbols are correct, but they are not utilized in any way due to the absence of code. The `alphabet_order` and `words` need to be used to implement the sorting logic.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
426
+ },
427
+ {
428
+ "role": "assistant",
429
+ "content": "To address the issues, we need to implement a Python function that sorts the words based on the custom alphabet order where 'x' is the last letter. Here's the plan:\n\n1. **Create a mapping**: We will create a dictionary that maps each letter to its position in the new alphabet order.\n2. **Sort the words**: Use the custom mapping to sort the list of words.\n3. **Return the result**: Convert the sorted list into a comma-separated string and return it.\n\nLet's implement this in the `solve` function.\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```\n\nThis code defines a `solve` function that takes the extracted symbols, creates a custom order map, sorts the words based on this map, and returns the sorted words as a comma-separated string. This should resolve the issues and provide the correct output."
430
+ },
431
+ {
432
+ "role": "user",
433
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: None\nStandard output: \nExceptions: Error: string indices must be integers, not 'str'\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. The code attempts to extract words and a custom alphabet order from the `symbols` input, but the extracted symbols are empty, leading to a KeyError when trying to access `symbols[\"words\"]` and `symbols[\"alphabet_order\"]`.\n\n2. The code does not handle the input directly from the question, which means it cannot function correctly without the necessary input data being provided in the expected format.\n\n3. The code execution results in an error due to the incorrect handling of the input, specifically the absence of the required keys in the `symbols` dictionary.\n\n4. The code does not address the specific requirement of the problem, which is to sort the words based on a new alphabet where 'x' is the last letter, as it relies on an undefined `alphabet_order` from the input. \n\n5. The code returns `None` due to the exception, which is not the expected behavior for solving the problem. The function should return a sorted, comma-separated string of words.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
434
+ },
435
+ {
436
+ "role": "assistant",
437
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyzx\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"]\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```"
438
+ },
439
+ {
440
+ "role": "user",
441
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Incorrect Alphabet Order**: The custom alphabet order provided in the code is incorrect. The letter 'x' should be the last letter, but the order given is \"abcdefghijklmnopqrstuvwyzx\", which places 'z' after 'w' and before 'x'. The correct order should be \"abcdefghijklmnopqrstuvwyz\" with 'x' at the end.\n\n2. **Incorrect Sorting**: Due to the incorrect alphabet order, the sorting of words is not performed as intended according to the problem statement. This results in an incorrect order of words in the output.\n\n3. **Output Format**: The code correctly returns a comma-separated string of sorted words, which matches the required output format. However, due to the incorrect sorting, the output is not as expected.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
442
+ },
443
+ {
444
+ "role": "assistant",
445
+ "content": "```json\n{\n \"words\": [\n \"oversimplifications\",\n \"consternate\",\n \"administrations\",\n \"collaborations\",\n \"blest\",\n \"buxton\",\n \"manipulations\",\n \"tariff\",\n \"quizzes\",\n \"sound\",\n \"xerxes\",\n \"institutionalization\",\n \"proximity\"\n ],\n \"alphabet_order\": \"abcdefghijklmnopqrstuvwyz\"\n}\n```\n\n```python\ndef solve(symbols):\n # Extract words and custom alphabet order from symbols\n words = symbols[\"words\"]\n alphabet_order = symbols[\"alphabet_order\"] + \"x\" # Correctly place 'x' at the end\n \n # Create a mapping of each letter to its position in the custom alphabet order\n order_map = {char: index for index, char in enumerate(alphabet_order)}\n \n # Define a custom sorting key function\n def custom_sort_key(word):\n # Convert each word into a tuple of indices based on the custom alphabet order\n return [order_map[char] for char in word]\n \n # Sort the words using the custom sorting key\n sorted_words = sorted(words, key=custom_sort_key)\n \n # Join the sorted words into a comma-separated string\n result = \", \".join(sorted_words)\n \n return result\n```"
446
+ },
447
+ {
448
+ "role": "user",
449
+ "content": "Please fix the issues with the code and symbols or output \"FINISHED\".\nThe following is the result of evaluating the above code with the extracted symbols.\n```\nReturn value: administrations, blest, buxton, collaborations, consternate, institutionalization, manipulations, oversimplifications, proximity, quizzes, sound, tariff, xerxes\nStandard output: \nExceptions: None\n```\n\nThe following is the summary of issues found with the code or the extracted symbols by another model:\n```\n1. **Incorrect Alphabet Order**: The code incorrectly appends 'x' to the end of the `alphabet_order` string, which already includes 'x' in its correct position. This results in an incorrect order where 'x' is considered twice, potentially causing incorrect sorting behavior.\n\n2. **Incorrect Sorting**: Due to the incorrect alphabet order, the sorting of words is not performed according to the specified new alphabet where 'x' should be the last letter. This impacts the correctness of the answer.\n\n3. **Output Format**: The code correctly returns a comma-separated string of sorted words, which matches the required output format. However, due to the incorrect sorting, the output is not as expected.\n```\n\nIf there are any issues which impact the correctness of the answer, please output code which does not have the issues. Before outputting any code, plan how the code will solve the problem and avoid the issues.\nIf stuck, try outputting different code to solve the problem in a different way.\nYou may also revise the extracted symbols. To do this, output the revised symbols in a JSON code block. Only include information in the JSON which is present in the original input to keep the code grounded in the specific problem. Some examples of symbol revisions are changing the names of certain symbols, providing further granularity, and adding information which was originally missed.\nIf everything is correct, output the word \"FINISHED\" and nothing else.\n"
450
+ }
451
+ ]
452
+ }
453
+ },
454
+ {
455
+ "step": "finished",
456
+ "message": "Solution completed successfully!",
457
+ "iteration": null,
458
+ "prompt_details": null
459
+ }
460
+ ],
461
+ "timestamp": 1760565821.8149712
462
+ }
src/pips/static/README.md ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIPS Frontend Modularization
2
+
3
+ This directory contains the refactored, modular version of the PIPS (Per-Instance Program Synthesis) frontend application.
4
+
5
+ ## 📁 Structure Overview
6
+
7
+ ```
8
+ pips/static/
9
+ ├── css/
10
+ │ ├── main.css # Main CSS entry point (imports all modules)
11
+ │ ├── tokens.css # Design tokens (colors, spacing, etc.)
12
+ │ ├── base.css # Global resets and typography
13
+ │ └── components/
14
+ │ ├── panels.css # Left/right panel layouts
15
+ │ ├── forms.css # Form elements and inputs
16
+ │ ├── buttons.css # Button components
17
+ │ ├── chat.css # Chat area and message styles
18
+ │ ├── sessions.css # Session management UI
19
+ │ ├── modal.css # Modal dialogs
20
+ │ ├── utilities.css # Utility classes and animations
21
+ │ └── responsive.css # Media queries for mobile
22
+ ├── js/
23
+ │ ├── main.js # Application bootstrap
24
+ │ ├── core/
25
+ │ │ ├── logger.js # Debug logging utility
26
+ │ │ ├── state.js # Application state management
27
+ │ │ └── storage.js # localStorage utilities
28
+ │ └── network/
29
+ │ └── socket.js # Socket.IO connection management
30
+ └── README.md # This file
31
+ ```
32
+
33
+ ## 🔄 Migration from Monolithic
34
+
35
+ ### Before (index.html)
36
+ - **~4000 lines** in single file
37
+ - Inline `<style>` block with ~1500 lines of CSS
38
+ - Inline `<script>` block with ~3500 lines of JavaScript
39
+ - All functionality tightly coupled
40
+ - Difficult to maintain and debug
41
+
42
+ ### After (Modular)
43
+ - **HTML template**: Clean markup without inline styles/scripts
44
+ - **CSS modules**: 8 focused stylesheets (~200-400 lines each)
45
+ - **JS modules**: ES6 modules with clear separation of concerns
46
+ - **Zero functional changes**: All UI/UX behavior preserved
47
+ - **Better maintainability**: Each module has single responsibility
48
+
49
+ ## 🚀 Features Preserved
50
+
51
+ ✅ **All original functionality maintained**:
52
+ - Socket.IO real-time communication
53
+ - Problem solving workflow
54
+ - Session management and persistence
55
+ - Settings modal with API key storage
56
+ - Image upload with drag & drop
57
+ - Responsive design for mobile
58
+ - Code syntax highlighting
59
+ - Progress indicators and status updates
60
+ - Chat history export
61
+
62
+ ## 🛠 Development Guide
63
+
64
+ ### Using the Modular Version
65
+
66
+ 1. **Replace the template**: Use `index_modular.html` instead of `index.html`
67
+ 2. **CSS is automatically loaded**: `main.css` imports all component stylesheets
68
+ 3. **JS modules load automatically**: ES6 modules with proper imports
69
+
70
+ ### CSS Architecture
71
+
72
+ - **Tokens first**: All colors, spacing, and design tokens in `tokens.css`
73
+ - **Component-based**: Each UI component has its own stylesheet
74
+ - **BEM-like naming**: Clear, descriptive class names
75
+ - **Mobile-first responsive**: Media queries in `responsive.css`
76
+
77
+ ### JavaScript Architecture
78
+
79
+ - **ES6 modules**: Clean imports/exports
80
+ - **Event-driven**: State changes emit events for loose coupling
81
+ - **Error handling**: Global error boundary with detailed logging
82
+ - **Singleton patterns**: Shared instances for state, storage, socket
83
+
84
+ ### Adding New Features
85
+
86
+ 1. **CSS**: Add new component files in `css/components/`
87
+ 2. **JS**: Create feature modules and import in `main.js`
88
+ 3. **Update imports**: Add to `main.css` and import in relevant JS files
89
+
90
+ ## 🧪 Testing
91
+
92
+ The modular version maintains 100% functional compatibility:
93
+
94
+ - **Visual regression**: All styles render identically
95
+ - **Behavioral compatibility**: All interactions work the same
96
+ - **API compatibility**: Same Socket.IO events and data flow
97
+ - **Storage compatibility**: Same localStorage keys and data formats
98
+
99
+ ## 📊 Benefits Achieved
100
+
101
+ | Metric | Before | After | Improvement |
102
+ |--------|--------|-------|-------------|
103
+ | **Maintainability** | Single 4000-line file | 8 CSS + 5 JS modules | 🎯 **Huge** |
104
+ | **Debuggability** | Global scope pollution | Modular namespaces | 🎯 **Much better** |
105
+ | **Team collaboration** | Merge conflicts frequent | Parallel development | 🎯 **Greatly improved** |
106
+ | **Code reusability** | Copy-paste only | Import/export modules | 🎯 **Full reusability** |
107
+ | **Bundle size** | Same | Same | ✅ **No change** |
108
+ | **Performance** | Same | Same | ✅ **No change** |
109
+ | **Functionality** | All features | All features | ✅ **Zero regressions** |
110
+
111
+ ## 🔧 Browser Support
112
+
113
+ - **Modern ES6 support required** for modules
114
+ - **Fallback**: Original `index.html` works in older browsers
115
+ - **Progressive enhancement**: Feather icons degrade gracefully
116
+
117
+ ## 🐛 Debugging
118
+
119
+ ### Enable Debug Logging
120
+ ```javascript
121
+ // In browser console
122
+ window.pipsApp.logger.debug('Component', 'Debug message', data);
123
+ ```
124
+
125
+ ### State Inspection
126
+ ```javascript
127
+ // Check current application state
128
+ console.log(window.pipsApp.state.getSnapshot());
129
+ ```
130
+
131
+ ### Network Debugging
132
+ ```javascript
133
+ // Check socket connection
134
+ console.log(window.pipsApp.socketManager.isConnected());
135
+ ```
136
+
137
+ ## 🚀 Future Enhancements
138
+
139
+ This modular foundation enables:
140
+
141
+ - **Unit testing**: Individual modules can be tested in isolation
142
+ - **Bundle optimization**: Tree-shaking and code splitting
143
+ - **TypeScript migration**: Easy to add type definitions
144
+ - **Component documentation**: Auto-generated docs from modules
145
+ - **Hot module replacement**: Development workflow improvements
146
+ - **Feature flags**: Conditional module loading
147
+
148
+ ## 📝 Files Modified
149
+
150
+ ### New Files Created
151
+ - `pips/static/css/main.css` - Main CSS entry point
152
+ - `pips/static/css/tokens.css` - Design system tokens
153
+ - `pips/static/css/base.css` - Global styles
154
+ - `pips/static/css/components/*.css` - Component stylesheets (8 files)
155
+ - `pips/static/js/main.js` - Application bootstrap
156
+ - `pips/static/js/core/*.js` - Core utilities (3 files)
157
+ - `pips/static/js/network/socket.js` - Socket management
158
+ - `pips/templates/index_modular.html` - Clean template
159
+
160
+ ### Preserved Files
161
+ - `pips/templates/index.html` - Original monolithic file (unchanged)
162
+
163
+ This modularization provides a solid foundation for maintaining and extending the PIPS application while preserving all existing functionality.
src/pips/static/css/base.css ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Base Reset and Typography */
2
+ * {
3
+ margin: 0;
4
+ padding: 0;
5
+ box-sizing: border-box;
6
+ }
7
+
8
+ body {
9
+ font-family: 'Poppins', 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
10
+ background: linear-gradient(135deg, var(--primary-50) 0%, var(--gray-50) 100%);
11
+ height: 100vh;
12
+ overflow: hidden;
13
+ color: var(--gray-900);
14
+ font-size: 14px;
15
+ line-height: 1.5;
16
+ font-weight: 400;
17
+ }
18
+
19
+ /* Global container and layout */
20
+ .container {
21
+ display: flex;
22
+ height: 100vh;
23
+ background: white;
24
+ box-shadow: var(--shadow-xl);
25
+ max-width: 100vw;
26
+ overflow-x: hidden;
27
+ }
28
+
29
+ /* Force word breaking for very long text */
30
+ .container {
31
+ max-width: 100vw;
32
+ overflow-x: hidden;
33
+
34
+ /* Primary colors with opacity variants */
35
+ --primary-color: var(--primary-500);
36
+ --primary-darker: var(--primary-600);
37
+ --primary-color-20: rgba(14, 165, 233, 0.2);
38
+
39
+ /* Surface colors */
40
+ --surface: var(--gray-25);
41
+ --surface-elevated: rgba(255, 255, 255, 0.95);
42
+
43
+ /* Text colors */
44
+ --text-primary: var(--gray-900);
45
+ --text-secondary: var(--gray-600);
46
+ --text-muted: var(--gray-500);
47
+
48
+ /* Border colors */
49
+ --border-color: var(--gray-200);
50
+ --border-hover: var(--gray-300);
51
+ }
src/pips/static/css/components/buttons.css ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Button Components */
2
+ .button-group {
3
+ display: flex;
4
+ gap: 16px;
5
+ margin-top: auto;
6
+ padding-top: 8px;
7
+ }
8
+
9
+ .btn {
10
+ display: inline-flex;
11
+ align-items: center;
12
+ justify-content: center;
13
+ gap: 10px;
14
+ padding: 16px 24px;
15
+ border: none;
16
+ border-radius: 12px;
17
+ font-size: 15px;
18
+ font-weight: 600;
19
+ cursor: pointer;
20
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
21
+ text-decoration: none;
22
+ font-family: 'Poppins', 'Inter', sans-serif;
23
+ letter-spacing: 0.01em;
24
+ position: relative;
25
+ overflow: hidden;
26
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
27
+ }
28
+
29
+ .btn::before {
30
+ content: '';
31
+ position: absolute;
32
+ top: 0;
33
+ left: -100%;
34
+ width: 100%;
35
+ height: 100%;
36
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
37
+ transition: left 0.5s;
38
+ }
39
+
40
+ .btn:hover::before {
41
+ left: 100%;
42
+ }
43
+
44
+ .btn-primary {
45
+ background: linear-gradient(135deg, var(--primary-600) 0%, var(--primary-700) 100%);
46
+ color: white;
47
+ flex: 1;
48
+ box-shadow: 0 4px 14px rgba(14, 165, 233, 0.3);
49
+ }
50
+
51
+ .btn-primary:hover:not(:disabled) {
52
+ background: linear-gradient(135deg, var(--primary-700) 0%, var(--primary-800) 100%);
53
+ transform: translateY(-2px);
54
+ box-shadow: 0 6px 20px rgba(14, 165, 233, 0.4);
55
+ }
56
+
57
+ .btn-secondary {
58
+ background: linear-gradient(135deg, white 0%, var(--gray-50) 100%);
59
+ color: var(--gray-700);
60
+ border: 2px solid var(--gray-200);
61
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.06);
62
+ }
63
+
64
+ .btn-secondary:hover:not(:disabled) {
65
+ background: linear-gradient(135deg, var(--gray-50) 0%, var(--gray-100) 100%);
66
+ border-color: var(--gray-300);
67
+ transform: translateY(-2px);
68
+ box-shadow: 0 6px 16px rgba(0, 0, 0, 0.1);
69
+ }
70
+
71
+ .btn-danger {
72
+ background: linear-gradient(135deg, var(--error-500) 0%, var(--error-600) 100%);
73
+ color: white;
74
+ box-shadow: 0 4px 14px rgba(239, 68, 68, 0.3);
75
+ }
76
+
77
+ .btn-danger:hover:not(:disabled) {
78
+ background: linear-gradient(135deg, var(--error-600) 0%, var(--error-700) 100%);
79
+ transform: translateY(-2px);
80
+ box-shadow: 0 6px 20px rgba(239, 68, 68, 0.4);
81
+ }
82
+
83
+ .btn:disabled {
84
+ opacity: 0.6;
85
+ cursor: not-allowed;
86
+ transform: none;
87
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.05);
88
+ }
89
+
90
+ .btn:disabled::before {
91
+ display: none;
92
+ }
93
+
94
+ .btn-session-action {
95
+ display: inline-flex;
96
+ align-items: center;
97
+ gap: 8px;
98
+ padding: 10px 14px;
99
+ background: var(--gray-100);
100
+ color: var(--gray-700);
101
+ border: 1px solid var(--gray-200);
102
+ border-radius: 8px;
103
+ font-size: 13px;
104
+ font-weight: 500;
105
+ cursor: pointer;
106
+ transition: all 0.2s ease;
107
+ text-decoration: none;
108
+ }
109
+
110
+ .btn-session-action:hover {
111
+ background: var(--gray-200);
112
+ border-color: var(--gray-300);
113
+ transform: translateY(-1px);
114
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.08);
115
+ }
116
+
117
+ .btn-session-action:active {
118
+ transform: translateY(0);
119
+ }
src/pips/static/css/components/chat.css ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Chat Components */
2
+ .chat-area {
3
+ flex: 1;
4
+ padding: 24px;
5
+ overflow-y: auto;
6
+ overflow-x: hidden;
7
+ background: var(--gray-50);
8
+ word-wrap: break-word;
9
+ overflow-wrap: break-word;
10
+ }
11
+
12
+ .chat-message {
13
+ margin-bottom: 24px;
14
+ animation: fadeInUp 0.3s ease;
15
+ max-width: 100%;
16
+ word-wrap: break-word;
17
+ overflow-wrap: break-word;
18
+ }
19
+
20
+ @keyframes fadeInUp {
21
+ from {
22
+ opacity: 0;
23
+ transform: translateY(12px);
24
+ }
25
+ to {
26
+ opacity: 1;
27
+ transform: translateY(0);
28
+ }
29
+ }
30
+
31
+ .message-header {
32
+ display: flex;
33
+ align-items: center;
34
+ gap: 12px;
35
+ margin-bottom: 12px;
36
+ }
37
+
38
+ .message-avatar {
39
+ width: 36px;
40
+ height: 36px;
41
+ border-radius: 50%;
42
+ display: flex;
43
+ align-items: center;
44
+ justify-content: center;
45
+ color: white;
46
+ font-size: 14px;
47
+ font-weight: 600;
48
+ flex-shrink: 0;
49
+ }
50
+
51
+ .avatar-pips {
52
+ background: linear-gradient(135deg, var(--primary-500), var(--primary-600));
53
+ }
54
+
55
+ .avatar-llm {
56
+ background: linear-gradient(135deg, var(--success-500), var(--success-600));
57
+ }
58
+
59
+ .avatar-system {
60
+ background: linear-gradient(135deg, var(--warning-500), var(--warning-600));
61
+ }
62
+
63
+ .avatar-reviewer {
64
+ background: linear-gradient(135deg, var(--primary-500), var(--primary-700));
65
+ }
66
+
67
+ .message-sender {
68
+ font-weight: 600;
69
+ color: var(--gray-900);
70
+ font-family: 'Poppins', 'Inter', sans-serif;
71
+ }
72
+
73
+ .message-content {
74
+ background: white;
75
+ padding: 20px;
76
+ border-radius: var(--radius-lg);
77
+ border: 1px solid var(--gray-200);
78
+ box-shadow: var(--shadow-sm);
79
+ line-height: 1.6;
80
+ max-width: 100%;
81
+ word-wrap: break-word;
82
+ overflow-wrap: break-word;
83
+ }
84
+
85
+ /* Markdown formatting within messages */
86
+ .message-content h1,
87
+ .message-content h2,
88
+ .message-content h3,
89
+ .message-content h4,
90
+ .message-content h5,
91
+ .message-content h6 {
92
+ margin-top: 1.5em;
93
+ margin-bottom: 0.5em;
94
+ font-weight: 600;
95
+ color: var(--gray-900);
96
+ }
97
+
98
+ .message-content h1 { font-size: 1.5em; }
99
+ .message-content h2 { font-size: 1.3em; }
100
+ .message-content h3 { font-size: 1.2em; }
101
+ .message-content h4 { font-size: 1.1em; }
102
+
103
+ .message-content p {
104
+ margin: 8px 0;
105
+ word-wrap: break-word;
106
+ overflow-wrap: break-word;
107
+ }
108
+
109
+ .message-content pre {
110
+ max-width: 100%;
111
+ overflow-x: auto;
112
+ white-space: pre-wrap;
113
+ word-wrap: break-word;
114
+ }
115
+
116
+ .message-content ol,
117
+ .message-content ul {
118
+ margin-bottom: 1em;
119
+ padding-left: 1.5em;
120
+ }
121
+
122
+ .message-content li {
123
+ margin-bottom: 0.5em;
124
+ }
125
+
126
+ .message-content strong {
127
+ font-weight: 600;
128
+ color: var(--gray-900);
129
+ }
130
+
131
+ .message-content em {
132
+ font-style: italic;
133
+ }
134
+
135
+ .message-content blockquote {
136
+ border-left: 4px solid var(--primary-500);
137
+ padding-left: 1em;
138
+ margin: 1em 0;
139
+ color: var(--gray-700);
140
+ font-style: italic;
141
+ }
142
+
143
+ .message-content table {
144
+ border-collapse: collapse;
145
+ width: 100%;
146
+ margin: 1em 0;
147
+ max-width: 100%;
148
+ table-layout: fixed;
149
+ }
150
+
151
+ .message-content th,
152
+ .message-content td {
153
+ border: 1px solid var(--gray-200);
154
+ padding: 0.5em;
155
+ text-align: left;
156
+ }
157
+
158
+ .message-content th {
159
+ background: var(--gray-50);
160
+ font-weight: 600;
161
+ }
162
+
163
+ .code-block {
164
+ background: var(--gray-900);
165
+ color: var(--gray-100);
166
+ padding: 16px;
167
+ border-radius: 8px;
168
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
169
+ font-size: 14px;
170
+ line-height: 1.4;
171
+ margin: 16px 0;
172
+ overflow-x: auto;
173
+ max-width: 100%;
174
+ white-space: pre-wrap;
175
+ word-wrap: break-word;
176
+ }
177
+
178
+ .execution-result {
179
+ background: var(--gray-50);
180
+ border: 1px solid var(--gray-200);
181
+ border-left: 4px solid var(--success-500);
182
+ padding: 16px;
183
+ margin: 16px 0;
184
+ border-radius: 0 var(--radius-md) var(--radius-md) 0;
185
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace;
186
+ font-size: 13px;
187
+ }
188
+
189
+ .execution-result.error {
190
+ border-left-color: var(--error-500);
191
+ background: var(--error-50);
192
+ color: var(--error-800);
193
+ }
194
+
195
+ .final-answer {
196
+ background: linear-gradient(135deg, var(--success-500), var(--success-600));
197
+ color: white;
198
+ padding: 24px;
199
+ border-radius: var(--radius-xl);
200
+ font-size: 16px;
201
+ font-weight: 600;
202
+ text-align: center;
203
+ margin: 24px 0;
204
+ box-shadow: var(--shadow-lg);
205
+ }
206
+
207
+ .iteration-badge {
208
+ background: var(--primary-100);
209
+ color: var(--primary-700);
210
+ padding: 4px 8px;
211
+ border-radius: var(--radius-sm);
212
+ font-size: 12px;
213
+ font-weight: 500;
214
+ margin-left: auto;
215
+ }
216
+
217
+ /* Force word breaking for very long text */
218
+ .message-content,
219
+ .message-content *,
220
+ .code-block,
221
+ .code-block * {
222
+ word-break: break-word;
223
+ hyphens: auto;
224
+ }
225
+
226
+ /* Ensure tables and other wide elements stay within bounds */
227
+ .message-content table {
228
+ max-width: 100%;
229
+ table-layout: fixed;
230
+ }
231
+
232
+ .message-content img {
233
+ max-width: 100%;
234
+ height: auto;
235
+ }
236
+
237
+ /* Custom scrollbar */
238
+ .chat-area::-webkit-scrollbar {
239
+ width: 6px;
240
+ }
241
+
242
+ .chat-area::-webkit-scrollbar-track {
243
+ background: var(--gray-100);
244
+ }
245
+
246
+ .chat-area::-webkit-scrollbar-thumb {
247
+ background: var(--gray-300);
248
+ border-radius: 3px;
249
+ }
250
+
251
+ .chat-area::-webkit-scrollbar-thumb:hover {
252
+ background: var(--gray-400);
253
+ }
254
+
255
+ /* Scroll to bottom button */
256
+ .scroll-to-bottom-btn {
257
+ position: absolute;
258
+ bottom: 20px;
259
+ right: 20px;
260
+ z-index: 1000;
261
+
262
+ display: flex;
263
+ align-items: center;
264
+ gap: 8px;
265
+
266
+ background: var(--surface-elevated);
267
+ border: 1px solid var(--border-color);
268
+ border-radius: 24px;
269
+ padding: 12px 16px;
270
+
271
+ color: var(--text-secondary);
272
+ font-size: 14px;
273
+ font-weight: 500;
274
+
275
+ cursor: pointer;
276
+
277
+ /* Shadow and effects */
278
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
279
+ backdrop-filter: blur(8px);
280
+
281
+ /* Smooth transitions */
282
+ transition: all 0.2s ease;
283
+ transform: translateY(0);
284
+ opacity: 0;
285
+ visibility: hidden;
286
+ }
287
+
288
+ .scroll-to-bottom-btn.show {
289
+ opacity: 1;
290
+ visibility: visible;
291
+ transform: translateY(0);
292
+ }
293
+
294
+ .scroll-to-bottom-btn:hover {
295
+ background: var(--primary-color);
296
+ color: white;
297
+ transform: translateY(-2px);
298
+ box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2);
299
+ }
300
+
301
+ .scroll-to-bottom-btn:active {
302
+ transform: translateY(0);
303
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
304
+ }
305
+
306
+ /* New messages indicator */
307
+ .scroll-to-bottom-btn .new-messages-indicator {
308
+ font-size: 12px;
309
+ font-weight: 600;
310
+ white-space: nowrap;
311
+ }
312
+
313
+ .scroll-to-bottom-btn.has-new-messages {
314
+ background: var(--primary-color);
315
+ color: white;
316
+ animation: pulse-glow 2s infinite;
317
+ }
318
+
319
+ .scroll-to-bottom-btn.has-new-messages:hover {
320
+ background: var(--primary-darker);
321
+ }
322
+
323
+ @keyframes pulse-glow {
324
+ 0%, 100% {
325
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
326
+ }
327
+ 50% {
328
+ box-shadow: 0 4px 20px var(--primary-color-20), 0 0 20px var(--primary-color-20);
329
+ }
330
+ }
331
+
332
+ /* Responsive adjustments */
333
+ @media (max-width: 768px) {
334
+ .scroll-to-bottom-btn {
335
+ bottom: 15px;
336
+ right: 15px;
337
+ padding: 10px 14px;
338
+ font-size: 13px;
339
+ }
340
+
341
+ .scroll-to-bottom-btn .new-messages-indicator {
342
+ display: none !important; /* Hide text on mobile, just show icon */
343
+ }
344
+ }
src/pips/static/css/components/feedback.css ADDED
@@ -0,0 +1,1260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Interactive Feedback Sidebar Styles
3
+ *
4
+ * Compact, elegant sidebar for PIPS interactive mode feedback interface.
5
+ */
6
+
7
+ /* Interactive Feedback Sidebar Styles */
8
+
9
+ .feedback-sidebar {
10
+ position: fixed;
11
+ top: 0;
12
+ right: -400px; /* Start off-screen */
13
+ width: 380px;
14
+ height: 100vh;
15
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
16
+ color: white;
17
+ z-index: 10000;
18
+ transition: right 0.3s cubic-bezier(0.4, 0, 0.2, 1);
19
+ box-shadow: -5px 0 20px rgba(0, 0, 0, 0.3);
20
+ display: flex;
21
+ flex-direction: column;
22
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
23
+ overflow: hidden;
24
+ }
25
+
26
+ .feedback-sidebar.visible {
27
+ right: 0;
28
+ }
29
+
30
+ .feedback-sidebar.resizing {
31
+ transition: none;
32
+ user-select: none;
33
+ }
34
+
35
+ /* Resize Handle */
36
+ .feedback-resize-handle {
37
+ position: absolute;
38
+ left: -5px;
39
+ top: 0;
40
+ width: 10px;
41
+ height: 100%;
42
+ cursor: ew-resize;
43
+ background: rgba(255, 255, 255, 0.1);
44
+ border-left: 2px solid rgba(255, 255, 255, 0.2);
45
+ transition: background-color 0.2s ease;
46
+ z-index: 1;
47
+ }
48
+
49
+ .feedback-resize-handle:hover {
50
+ background: rgba(255, 255, 255, 0.2);
51
+ }
52
+
53
+ .feedback-resize-handle::before {
54
+ content: '';
55
+ position: absolute;
56
+ left: 50%;
57
+ top: 50%;
58
+ transform: translate(-50%, -50%);
59
+ width: 3px;
60
+ height: 30px;
61
+ background: rgba(255, 255, 255, 0.4);
62
+ border-radius: 2px;
63
+ }
64
+
65
+ /* Header */
66
+ .feedback-sidebar-header {
67
+ padding: 20px;
68
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
69
+ display: flex;
70
+ justify-content: space-between;
71
+ align-items: center;
72
+ background: rgba(255, 255, 255, 0.05);
73
+ backdrop-filter: blur(10px);
74
+ }
75
+
76
+ .feedback-title h4 {
77
+ margin: 0 0 5px 0;
78
+ font-size: 18px;
79
+ font-weight: 600;
80
+ }
81
+
82
+ .iteration-badge {
83
+ background: rgba(255, 255, 255, 0.2);
84
+ padding: 4px 8px;
85
+ border-radius: 12px;
86
+ font-size: 12px;
87
+ font-weight: 500;
88
+ }
89
+
90
+ .feedback-controls {
91
+ display: flex;
92
+ align-items: center;
93
+ gap: 8px;
94
+ }
95
+
96
+ .feedback-close {
97
+ background: none;
98
+ border: none;
99
+ color: white;
100
+ cursor: pointer;
101
+ padding: 8px;
102
+ border-radius: 6px;
103
+ transition: all 0.2s ease;
104
+ display: flex;
105
+ align-items: center;
106
+ justify-content: center;
107
+ width: 36px;
108
+ height: 36px;
109
+ }
110
+
111
+ .feedback-close:hover {
112
+ background: rgba(255, 255, 255, 0.1);
113
+ transform: scale(1.05);
114
+ }
115
+
116
+ .feedback-close:active {
117
+ transform: scale(0.95);
118
+ }
119
+
120
+ /* Content Area */
121
+ .feedback-sidebar-content {
122
+ flex: 1;
123
+ overflow-y: auto;
124
+ padding: 0;
125
+ scrollbar-width: thin;
126
+ scrollbar-color: rgba(255, 255, 255, 0.3) transparent;
127
+ }
128
+
129
+ .feedback-sidebar-content::-webkit-scrollbar {
130
+ width: 6px;
131
+ }
132
+
133
+ .feedback-sidebar-content::-webkit-scrollbar-track {
134
+ background: transparent;
135
+ }
136
+
137
+ .feedback-sidebar-content::-webkit-scrollbar-thumb {
138
+ background: rgba(255, 255, 255, 0.3);
139
+ border-radius: 3px;
140
+ }
141
+
142
+ /* Symbols Section */
143
+ .symbols-section {
144
+ padding: 20px;
145
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
146
+ }
147
+
148
+ .section-header {
149
+ display: flex;
150
+ justify-content: space-between;
151
+ align-items: center;
152
+ margin-bottom: 15px;
153
+ }
154
+
155
+ .section-header h5 {
156
+ margin: 0;
157
+ font-size: 14px;
158
+ font-weight: 600;
159
+ text-transform: uppercase;
160
+ letter-spacing: 0.5px;
161
+ opacity: 0.9;
162
+ }
163
+
164
+ .expand-symbols-btn {
165
+ background: rgba(255, 255, 255, 0.1);
166
+ border: none;
167
+ color: white;
168
+ padding: 6px;
169
+ border-radius: 4px;
170
+ cursor: pointer;
171
+ transition: background-color 0.2s ease;
172
+ }
173
+
174
+ .expand-symbols-btn:hover {
175
+ background: rgba(255, 255, 255, 0.2);
176
+ }
177
+
178
+ /* JSON Display */
179
+ .symbols-json-preview,
180
+ .selectable-json {
181
+ background: rgba(0, 0, 0, 0.2);
182
+ border-radius: 6px;
183
+ padding: 12px;
184
+ margin: 0;
185
+ font-family: 'Monaco', 'Menlo', monospace;
186
+ font-size: 11px;
187
+ line-height: 1.4;
188
+ color: #e1e1e1;
189
+ overflow-x: auto;
190
+ cursor: pointer;
191
+ transition: background-color 0.2s ease;
192
+ user-select: text;
193
+ }
194
+
195
+ .symbols-json-preview:hover,
196
+ .selectable-json:hover {
197
+ background: rgba(0, 0, 0, 0.3);
198
+ }
199
+
200
+ .symbols-json-preview {
201
+ max-height: 150px;
202
+ overflow-y: auto;
203
+ }
204
+
205
+ .no-symbols {
206
+ text-align: center;
207
+ opacity: 0.6;
208
+ font-style: italic;
209
+ margin: 0;
210
+ }
211
+
212
+ /* Code Preview Section */
213
+ .code-preview-section {
214
+ padding: 20px;
215
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
216
+ }
217
+
218
+ .expand-code-btn {
219
+ background: rgba(255, 255, 255, 0.1);
220
+ border: none;
221
+ color: white;
222
+ padding: 6px;
223
+ border-radius: 4px;
224
+ cursor: pointer;
225
+ transition: background-color 0.2s ease;
226
+ }
227
+
228
+ .expand-code-btn:hover {
229
+ background: rgba(255, 255, 255, 0.2);
230
+ }
231
+
232
+ .code-preview {
233
+ background: rgba(0, 0, 0, 0.2);
234
+ border-radius: 6px;
235
+ overflow: hidden;
236
+ }
237
+
238
+ .code-snippet {
239
+ margin: 0;
240
+ padding: 15px;
241
+ font-family: 'Monaco', 'Menlo', monospace;
242
+ font-size: 12px;
243
+ line-height: 1.4;
244
+ color: #e1e1e1;
245
+ background: transparent;
246
+ overflow-x: auto;
247
+ }
248
+
249
+ .hoverable-code {
250
+ cursor: pointer;
251
+ transition: background-color 0.2s ease;
252
+ }
253
+
254
+ .hoverable-code:hover {
255
+ background: rgba(255, 255, 255, 0.05) !important;
256
+ }
257
+
258
+ /* Critic Section */
259
+ .critic-section {
260
+ padding: 20px;
261
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
262
+ }
263
+
264
+ .critic-toggle {
265
+ position: relative;
266
+ display: inline-block;
267
+ width: 44px;
268
+ height: 24px;
269
+ }
270
+
271
+ .critic-toggle input {
272
+ opacity: 0;
273
+ width: 0;
274
+ height: 0;
275
+ }
276
+
277
+ .toggle-slider {
278
+ position: absolute;
279
+ cursor: pointer;
280
+ top: 0;
281
+ left: 0;
282
+ right: 0;
283
+ bottom: 0;
284
+ background-color: rgba(255, 255, 255, 0.3);
285
+ transition: 0.3s;
286
+ border-radius: 24px;
287
+ }
288
+
289
+ .toggle-slider:before {
290
+ position: absolute;
291
+ content: "";
292
+ height: 18px;
293
+ width: 18px;
294
+ left: 3px;
295
+ bottom: 3px;
296
+ background-color: white;
297
+ transition: 0.3s;
298
+ border-radius: 50%;
299
+ }
300
+
301
+ input:checked + .toggle-slider {
302
+ background-color: #4CAF50;
303
+ }
304
+
305
+ input:checked + .toggle-slider:before {
306
+ transform: translateX(20px);
307
+ }
308
+
309
+ .critic-summary {
310
+ margin-top: 15px;
311
+ }
312
+
313
+ .critic-summary-text {
314
+ margin: 0;
315
+ opacity: 0.9;
316
+ line-height: 1.5;
317
+ }
318
+
319
+ .no-issues {
320
+ margin: 0;
321
+ opacity: 0.7;
322
+ font-style: italic;
323
+ }
324
+
325
+ /* Feedback Cart (Shopping Cart Style) */
326
+ .feedback-cart {
327
+ padding: 20px;
328
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
329
+ max-height: 300px;
330
+ overflow-y: auto;
331
+ }
332
+
333
+ .cart-count {
334
+ background: rgba(255, 255, 255, 0.2);
335
+ padding: 2px 8px;
336
+ border-radius: 12px;
337
+ font-size: 11px;
338
+ font-weight: 500;
339
+ }
340
+
341
+ .cart-items {
342
+ margin-top: 15px;
343
+ }
344
+
345
+ .empty-cart {
346
+ text-align: center;
347
+ padding: 20px;
348
+ opacity: 0.7;
349
+ }
350
+
351
+ .empty-cart i {
352
+ font-size: 24px;
353
+ margin-bottom: 10px;
354
+ opacity: 0.5;
355
+ }
356
+
357
+ .empty-cart p {
358
+ margin: 0 0 5px 0;
359
+ font-size: 14px;
360
+ }
361
+
362
+ .empty-cart small {
363
+ font-size: 12px;
364
+ opacity: 0.8;
365
+ }
366
+
367
+ .cart-item {
368
+ background: rgba(255, 255, 255, 0.1);
369
+ border-radius: 8px;
370
+ padding: 12px;
371
+ margin-bottom: 10px;
372
+ border: 1px solid rgba(255, 255, 255, 0.1);
373
+ transition: all 0.2s ease;
374
+ }
375
+
376
+ .cart-item:hover {
377
+ background: rgba(255, 255, 255, 0.15);
378
+ border-color: rgba(255, 255, 255, 0.2);
379
+ }
380
+
381
+ .cart-item-header {
382
+ display: flex;
383
+ justify-content: space-between;
384
+ align-items: center;
385
+ margin-bottom: 8px;
386
+ }
387
+
388
+ .cart-item-type {
389
+ display: flex;
390
+ align-items: center;
391
+ gap: 6px;
392
+ font-size: 12px;
393
+ font-weight: 600;
394
+ }
395
+
396
+ .cart-item-type i {
397
+ width: 14px;
398
+ height: 14px;
399
+ }
400
+
401
+ .cart-item-type small {
402
+ opacity: 0.7;
403
+ font-weight: 400;
404
+ margin-left: 4px;
405
+ }
406
+
407
+ .cart-item-actions {
408
+ display: flex;
409
+ gap: 4px;
410
+ }
411
+
412
+ .cart-action {
413
+ background: rgba(255, 255, 255, 0.1);
414
+ border: none;
415
+ color: white;
416
+ padding: 4px;
417
+ border-radius: 4px;
418
+ cursor: pointer;
419
+ transition: background-color 0.2s ease;
420
+ width: 24px;
421
+ height: 24px;
422
+ display: flex;
423
+ align-items: center;
424
+ justify-content: center;
425
+ }
426
+
427
+ .cart-action:hover {
428
+ background: rgba(255, 255, 255, 0.2);
429
+ }
430
+
431
+ .cart-action.remove:hover {
432
+ background: rgba(255, 0, 0, 0.6);
433
+ }
434
+
435
+ .cart-action i {
436
+ width: 12px;
437
+ height: 12px;
438
+ }
439
+
440
+ .cart-item-preview {
441
+ background: rgba(0, 0, 0, 0.2);
442
+ padding: 8px;
443
+ border-radius: 4px;
444
+ font-family: 'Monaco', 'Menlo', monospace;
445
+ font-size: 11px;
446
+ margin-bottom: 8px;
447
+ color: #ffd700;
448
+ word-break: break-all;
449
+ }
450
+
451
+ .cart-item-comment {
452
+ font-size: 13px;
453
+ line-height: 1.4;
454
+ opacity: 0.9;
455
+ }
456
+
457
+ /* Quick Actions */
458
+ .quick-actions {
459
+ padding: 20px;
460
+ display: flex;
461
+ gap: 10px;
462
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
463
+ }
464
+
465
+ .action-btn {
466
+ flex: 1;
467
+ padding: 12px 16px;
468
+ border: none;
469
+ border-radius: 6px;
470
+ font-weight: 600;
471
+ font-size: 13px;
472
+ cursor: pointer;
473
+ transition: all 0.2s ease;
474
+ display: flex;
475
+ align-items: center;
476
+ justify-content: center;
477
+ gap: 6px;
478
+ }
479
+
480
+ .action-btn.primary {
481
+ background: #4CAF50;
482
+ color: white;
483
+ }
484
+
485
+ .action-btn.primary:hover {
486
+ background: #45a049;
487
+ transform: translateY(-1px);
488
+ }
489
+
490
+ .action-btn.secondary {
491
+ background: rgba(255, 255, 255, 0.1);
492
+ color: white;
493
+ border: 1px solid rgba(255, 255, 255, 0.2);
494
+ }
495
+
496
+ .action-btn.secondary:hover {
497
+ background: rgba(255, 255, 255, 0.2);
498
+ }
499
+
500
+ .action-btn.success {
501
+ background: #2196F3;
502
+ color: white;
503
+ }
504
+
505
+ .action-btn.success:hover {
506
+ background: #1976D2;
507
+ transform: translateY(-1px);
508
+ }
509
+
510
+ .action-btn.small {
511
+ padding: 8px 12px;
512
+ font-size: 12px;
513
+ }
514
+
515
+ .action-btn:disabled {
516
+ opacity: 0.5;
517
+ cursor: not-allowed;
518
+ transform: none !important;
519
+ }
520
+
521
+ /* Comments Section */
522
+ .comments-section {
523
+ padding: 20px;
524
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
525
+ }
526
+
527
+ .comments-section h5 {
528
+ margin: 0 0 15px 0;
529
+ font-size: 14px;
530
+ font-weight: 600;
531
+ text-transform: uppercase;
532
+ letter-spacing: 0.5px;
533
+ opacity: 0.9;
534
+ }
535
+
536
+ #user-comments {
537
+ width: 100%;
538
+ background: rgba(255, 255, 255, 0.1);
539
+ border: 1px solid rgba(255, 255, 255, 0.2);
540
+ border-radius: 6px;
541
+ padding: 12px;
542
+ color: white;
543
+ font-family: inherit;
544
+ font-size: 14px;
545
+ resize: vertical;
546
+ min-height: 80px;
547
+ }
548
+
549
+ #user-comments::placeholder {
550
+ color: rgba(255, 255, 255, 0.6);
551
+ }
552
+
553
+ #user-comments:focus {
554
+ outline: none;
555
+ border-color: rgba(255, 255, 255, 0.4);
556
+ background: rgba(255, 255, 255, 0.15);
557
+ }
558
+
559
+ .comment-actions {
560
+ margin-top: 10px;
561
+ display: flex;
562
+ gap: 8px;
563
+ }
564
+
565
+ /* Dialogue Boxes */
566
+ .dialogue-box {
567
+ position: absolute;
568
+ bottom: 20px;
569
+ left: 20px;
570
+ right: 20px;
571
+ background: rgba(0, 0, 0, 0.95);
572
+ border-radius: 12px;
573
+ border: 1px solid rgba(255, 255, 255, 0.2);
574
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
575
+ backdrop-filter: blur(10px);
576
+ z-index: 100;
577
+ }
578
+
579
+ .dialogue-header {
580
+ padding: 15px 20px;
581
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
582
+ display: flex;
583
+ justify-content: space-between;
584
+ align-items: center;
585
+ background: rgba(255, 255, 255, 0.05);
586
+ border-radius: 12px 12px 0 0;
587
+ }
588
+
589
+ .dialogue-header h6 {
590
+ margin: 0;
591
+ font-size: 14px;
592
+ font-weight: 600;
593
+ color: white;
594
+ }
595
+
596
+ .dialogue-close {
597
+ background: none;
598
+ border: none;
599
+ color: rgba(255, 255, 255, 0.7);
600
+ cursor: pointer;
601
+ font-size: 18px;
602
+ width: 24px;
603
+ height: 24px;
604
+ display: flex;
605
+ align-items: center;
606
+ justify-content: center;
607
+ border-radius: 4px;
608
+ transition: all 0.2s ease;
609
+ }
610
+
611
+ .dialogue-close:hover {
612
+ background: rgba(255, 255, 255, 0.1);
613
+ color: white;
614
+ }
615
+
616
+ .dialogue-content {
617
+ padding: 20px;
618
+ }
619
+
620
+ .highlighted-content {
621
+ margin-bottom: 15px;
622
+ }
623
+
624
+ .highlighted-content label {
625
+ display: block;
626
+ font-size: 12px;
627
+ font-weight: 600;
628
+ color: rgba(255, 255, 255, 0.8);
629
+ margin-bottom: 8px;
630
+ text-transform: uppercase;
631
+ letter-spacing: 0.5px;
632
+ }
633
+
634
+ .highlight-preview {
635
+ background: rgba(255, 255, 255, 0.1);
636
+ border-radius: 6px;
637
+ padding: 10px;
638
+ border: 1px solid rgba(255, 255, 255, 0.2);
639
+ max-height: 100px;
640
+ overflow-y: auto;
641
+ }
642
+
643
+ .highlight-preview pre {
644
+ margin: 0;
645
+ font-family: 'Monaco', 'Menlo', monospace;
646
+ font-size: 11px;
647
+ line-height: 1.4;
648
+ color: #ffd700;
649
+ white-space: pre-wrap;
650
+ word-break: break-word;
651
+ }
652
+
653
+ .feedback-input {
654
+ margin-bottom: 15px;
655
+ }
656
+
657
+ .feedback-input label {
658
+ display: block;
659
+ font-size: 12px;
660
+ font-weight: 600;
661
+ color: rgba(255, 255, 255, 0.8);
662
+ margin-bottom: 8px;
663
+ text-transform: uppercase;
664
+ letter-spacing: 0.5px;
665
+ }
666
+
667
+ .feedback-input textarea {
668
+ width: 100%;
669
+ background: rgba(255, 255, 255, 0.1);
670
+ border: 1px solid rgba(255, 255, 255, 0.2);
671
+ border-radius: 6px;
672
+ padding: 12px;
673
+ color: white;
674
+ font-family: inherit;
675
+ font-size: 13px;
676
+ resize: vertical;
677
+ min-height: 80px;
678
+ }
679
+
680
+ .feedback-input textarea::placeholder {
681
+ color: rgba(255, 255, 255, 0.5);
682
+ }
683
+
684
+ .feedback-input textarea:focus {
685
+ outline: none;
686
+ border-color: rgba(255, 255, 255, 0.4);
687
+ background: rgba(255, 255, 255, 0.15);
688
+ }
689
+
690
+ .dialogue-actions {
691
+ display: flex;
692
+ gap: 10px;
693
+ justify-content: flex-end;
694
+ }
695
+
696
+ .dialogue-btn {
697
+ padding: 10px 20px;
698
+ border: none;
699
+ border-radius: 6px;
700
+ font-size: 13px;
701
+ font-weight: 600;
702
+ cursor: pointer;
703
+ transition: all 0.2s ease;
704
+ }
705
+
706
+ .dialogue-btn.primary {
707
+ background: #4CAF50;
708
+ color: white;
709
+ }
710
+
711
+ .dialogue-btn.primary:hover {
712
+ background: #45a049;
713
+ transform: translateY(-1px);
714
+ }
715
+
716
+ .dialogue-btn.secondary {
717
+ background: rgba(255, 255, 255, 0.1);
718
+ color: white;
719
+ border: 1px solid rgba(255, 255, 255, 0.2);
720
+ }
721
+
722
+ .dialogue-btn.secondary:hover {
723
+ background: rgba(255, 255, 255, 0.2);
724
+ }
725
+
726
+ /* Symbols Modal */
727
+ .symbols-modal {
728
+ position: fixed;
729
+ top: 0;
730
+ left: 0;
731
+ width: 100%;
732
+ height: 100%;
733
+ background: rgba(0, 0, 0, 0.7);
734
+ display: flex;
735
+ align-items: center;
736
+ justify-content: center;
737
+ z-index: 10001;
738
+ backdrop-filter: blur(5px);
739
+ }
740
+
741
+ .symbols-modal-content {
742
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
743
+ color: white;
744
+ border-radius: 12px;
745
+ width: 90%;
746
+ max-width: 800px;
747
+ max-height: 80%;
748
+ overflow: hidden;
749
+ box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3);
750
+ position: relative;
751
+ }
752
+
753
+ .symbols-modal-header {
754
+ padding: 20px;
755
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
756
+ display: flex;
757
+ justify-content: space-between;
758
+ align-items: center;
759
+ background: rgba(255, 255, 255, 0.05);
760
+ }
761
+
762
+ .symbols-modal-header h4 {
763
+ margin: 0;
764
+ font-size: 18px;
765
+ font-weight: 600;
766
+ }
767
+
768
+ .modal-close {
769
+ background: none;
770
+ border: none;
771
+ color: white;
772
+ cursor: pointer;
773
+ padding: 8px;
774
+ border-radius: 6px;
775
+ transition: background-color 0.2s ease;
776
+ }
777
+
778
+ .modal-close:hover {
779
+ background: rgba(255, 255, 255, 0.1);
780
+ }
781
+
782
+ .symbols-modal-body {
783
+ padding: 20px;
784
+ overflow-y: auto;
785
+ max-height: 60vh;
786
+ position: relative;
787
+ }
788
+
789
+ .symbols-container {
790
+ margin-bottom: 20px;
791
+ }
792
+
793
+ .symbols-json {
794
+ background: rgba(0, 0, 0, 0.3);
795
+ border-radius: 8px;
796
+ padding: 20px;
797
+ margin: 0;
798
+ font-family: 'Monaco', 'Menlo', monospace;
799
+ font-size: 12px;
800
+ line-height: 1.5;
801
+ color: #e1e1e1;
802
+ white-space: pre-wrap;
803
+ word-break: break-word;
804
+ user-select: text;
805
+ cursor: text;
806
+ min-height: 200px;
807
+ }
808
+
809
+ .symbols-json::selection {
810
+ background: rgba(100, 149, 237, 0.3);
811
+ color: white;
812
+ }
813
+
814
+ .selection-info {
815
+ text-align: center;
816
+ opacity: 0.7;
817
+ font-size: 13px;
818
+ margin-bottom: 20px;
819
+ }
820
+
821
+ .selection-info p {
822
+ margin: 0;
823
+ }
824
+
825
+ /* Code Modal */
826
+ .code-modal {
827
+ position: fixed;
828
+ top: 0;
829
+ left: 0;
830
+ width: 100%;
831
+ height: 100%;
832
+ background: rgba(0, 0, 0, 0.7);
833
+ display: flex;
834
+ align-items: center;
835
+ justify-content: center;
836
+ z-index: 10001;
837
+ backdrop-filter: blur(5px);
838
+ }
839
+
840
+ .code-modal-content {
841
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
842
+ color: white;
843
+ border-radius: 12px;
844
+ width: 95%;
845
+ max-width: 1200px;
846
+ max-height: 90%;
847
+ overflow: hidden;
848
+ box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3);
849
+ position: relative;
850
+ }
851
+
852
+ .code-modal-header {
853
+ padding: 20px;
854
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
855
+ display: flex;
856
+ justify-content: space-between;
857
+ align-items: center;
858
+ background: rgba(255, 255, 255, 0.05);
859
+ }
860
+
861
+ .code-modal-header h4 {
862
+ margin: 0;
863
+ font-size: 18px;
864
+ font-weight: 600;
865
+ }
866
+
867
+ .code-modal-body {
868
+ padding: 20px;
869
+ overflow-y: auto;
870
+ max-height: 70vh;
871
+ position: relative;
872
+ }
873
+
874
+ .code-container {
875
+ display: flex;
876
+ background: rgba(0, 0, 0, 0.3);
877
+ border-radius: 8px;
878
+ overflow: hidden;
879
+ margin-bottom: 15px;
880
+ }
881
+
882
+ .code-gutter {
883
+ background: rgba(0, 0, 0, 0.2);
884
+ padding: 15px 10px;
885
+ border-right: 1px solid rgba(255, 255, 255, 0.1);
886
+ user-select: none;
887
+ }
888
+
889
+ .line-number {
890
+ font-family: 'Monaco', 'Menlo', monospace;
891
+ font-size: 12px;
892
+ line-height: 1.4;
893
+ color: rgba(255, 255, 255, 0.5);
894
+ text-align: right;
895
+ cursor: pointer;
896
+ padding: 0 5px;
897
+ transition: color 0.2s ease;
898
+ }
899
+
900
+ .line-number:hover {
901
+ color: rgba(255, 255, 255, 0.8);
902
+ background: rgba(255, 255, 255, 0.1);
903
+ }
904
+
905
+ .code-display {
906
+ flex: 1;
907
+ margin: 0;
908
+ padding: 15px;
909
+ font-family: 'Monaco', 'Menlo', monospace;
910
+ font-size: 12px;
911
+ line-height: 1.4;
912
+ color: #e1e1e1;
913
+ background: transparent;
914
+ overflow-x: auto;
915
+ user-select: text;
916
+ cursor: text;
917
+ }
918
+
919
+ .code-display::selection {
920
+ background: rgba(100, 149, 237, 0.3);
921
+ color: white;
922
+ }
923
+
924
+ /* Loading State */
925
+ .loading-state {
926
+ display: flex;
927
+ flex-direction: column;
928
+ align-items: center;
929
+ justify-content: center;
930
+ padding: 40px 20px;
931
+ text-align: center;
932
+ }
933
+
934
+ .loading-spinner {
935
+ width: 40px;
936
+ height: 40px;
937
+ border: 3px solid rgba(255, 255, 255, 0.3);
938
+ border-top: 3px solid white;
939
+ border-radius: 50%;
940
+ animation: spin 1s linear infinite;
941
+ margin-bottom: 15px;
942
+ }
943
+
944
+ @keyframes spin {
945
+ 0% { transform: rotate(0deg); }
946
+ 100% { transform: rotate(360deg); }
947
+ }
948
+
949
+ .loading-state p {
950
+ margin: 0;
951
+ opacity: 0.9;
952
+ }
953
+
954
+ /* Notifications */
955
+ .feedback-notification {
956
+ position: fixed;
957
+ top: 20px;
958
+ right: 20px;
959
+ background: rgba(0, 0, 0, 0.9);
960
+ color: white;
961
+ padding: 12px 20px;
962
+ border-radius: 6px;
963
+ z-index: 10003;
964
+ transform: translateX(100%);
965
+ transition: transform 0.3s ease;
966
+ font-size: 14px;
967
+ font-weight: 500;
968
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3);
969
+ }
970
+
971
+ .feedback-notification.visible {
972
+ transform: translateX(0);
973
+ }
974
+
975
+ /* Final Artifacts Compact */
976
+ .final-artifacts-compact {
977
+ background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
978
+ color: white;
979
+ border-radius: 8px;
980
+ margin: 15px 0;
981
+ overflow: hidden;
982
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
983
+ }
984
+
985
+ .artifacts-header {
986
+ padding: 15px 20px;
987
+ display: flex;
988
+ justify-content: space-between;
989
+ align-items: center;
990
+ background: rgba(255, 255, 255, 0.1);
991
+ }
992
+
993
+ .artifacts-header h4 {
994
+ margin: 0;
995
+ font-size: 16px;
996
+ font-weight: 600;
997
+ }
998
+
999
+ .artifacts-close {
1000
+ background: none;
1001
+ border: none;
1002
+ color: white;
1003
+ cursor: pointer;
1004
+ padding: 5px;
1005
+ border-radius: 4px;
1006
+ transition: background-color 0.2s ease;
1007
+ }
1008
+
1009
+ .artifacts-close:hover {
1010
+ background: rgba(255, 255, 255, 0.2);
1011
+ }
1012
+
1013
+ .artifacts-content {
1014
+ padding: 20px;
1015
+ }
1016
+
1017
+ .artifacts-summary p {
1018
+ margin: 0 0 15px 0;
1019
+ font-size: 14px;
1020
+ }
1021
+
1022
+ .view-details-btn {
1023
+ background: rgba(255, 255, 255, 0.2);
1024
+ border: none;
1025
+ color: white;
1026
+ padding: 8px 16px;
1027
+ border-radius: 4px;
1028
+ cursor: pointer;
1029
+ font-size: 13px;
1030
+ font-weight: 500;
1031
+ transition: background-color 0.2s ease;
1032
+ }
1033
+
1034
+ .view-details-btn:hover {
1035
+ background: rgba(255, 255, 255, 0.3);
1036
+ }
1037
+
1038
+ .artifacts-details {
1039
+ margin-top: 15px;
1040
+ padding-top: 15px;
1041
+ border-top: 1px solid rgba(255, 255, 255, 0.2);
1042
+ }
1043
+
1044
+ .artifact-section {
1045
+ margin-bottom: 20px;
1046
+ }
1047
+
1048
+ .artifact-section:last-child {
1049
+ margin-bottom: 0;
1050
+ }
1051
+
1052
+ .artifact-section h5 {
1053
+ margin: 0 0 10px 0;
1054
+ font-size: 14px;
1055
+ font-weight: 600;
1056
+ opacity: 0.9;
1057
+ }
1058
+
1059
+ .artifact-code,
1060
+ .artifact-json {
1061
+ background: rgba(0, 0, 0, 0.3);
1062
+ padding: 15px;
1063
+ border-radius: 6px;
1064
+ font-family: 'Monaco', 'Menlo', monospace;
1065
+ font-size: 12px;
1066
+ line-height: 1.4;
1067
+ color: #e1e1e1;
1068
+ margin: 0;
1069
+ overflow-x: auto;
1070
+ white-space: pre-wrap;
1071
+ word-break: break-word;
1072
+ }
1073
+
1074
+ /* Responsive Design */
1075
+ @media (max-width: 768px) {
1076
+ .feedback-sidebar {
1077
+ width: 100vw;
1078
+ right: -100vw;
1079
+ }
1080
+
1081
+ .feedback-sidebar.visible {
1082
+ right: 0;
1083
+ }
1084
+
1085
+ .symbols-modal-content,
1086
+ .code-modal-content {
1087
+ width: 95%;
1088
+ height: 90%;
1089
+ }
1090
+
1091
+ .quick-actions {
1092
+ flex-direction: column;
1093
+ }
1094
+
1095
+ .action-btn {
1096
+ width: 100%;
1097
+ }
1098
+
1099
+ .dialogue-box {
1100
+ left: 10px;
1101
+ right: 10px;
1102
+ bottom: 10px;
1103
+ }
1104
+
1105
+ .dialogue-actions {
1106
+ flex-direction: column;
1107
+ }
1108
+
1109
+ .dialogue-btn {
1110
+ width: 100%;
1111
+ }
1112
+ }
1113
+
1114
+ /* Dark mode support */
1115
+ @media (prefers-color-scheme: dark) {
1116
+ .feedback-sidebar {
1117
+ background: var(--gray-900);
1118
+ border-left-color: var(--gray-700);
1119
+ }
1120
+
1121
+ .code-modal-content {
1122
+ background: var(--gray-900);
1123
+ }
1124
+
1125
+ .selection-popup {
1126
+ background: var(--gray-800);
1127
+ border-color: var(--gray-600);
1128
+ }
1129
+
1130
+ .final-artifacts-compact {
1131
+ background: var(--gray-900);
1132
+ border-color: var(--gray-700);
1133
+ }
1134
+ }
1135
+
1136
+ /* Feedback Restore Banner Styles */
1137
+ .feedback-restore-container {
1138
+ margin: 20px 0;
1139
+ border-radius: 16px;
1140
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1141
+ box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);
1142
+ overflow: hidden;
1143
+ border: 1px solid rgba(255, 255, 255, 0.1);
1144
+ backdrop-filter: blur(10px);
1145
+ animation: slideInFromRight 0.5s cubic-bezier(0.4, 0, 0.2, 1);
1146
+ }
1147
+
1148
+ @keyframes slideInFromRight {
1149
+ from {
1150
+ opacity: 0;
1151
+ transform: translateX(50px) scale(0.95);
1152
+ }
1153
+ to {
1154
+ opacity: 1;
1155
+ transform: translateX(0) scale(1);
1156
+ }
1157
+ }
1158
+
1159
+ .feedback-restore-banner {
1160
+ padding: 16px;
1161
+ color: white;
1162
+ position: relative;
1163
+ }
1164
+
1165
+ .feedback-restore-banner::before {
1166
+ content: '';
1167
+ position: absolute;
1168
+ top: 0;
1169
+ left: 0;
1170
+ right: 0;
1171
+ height: 2px;
1172
+ background: linear-gradient(90deg, #ffffff40, #ffffff80, #ffffff40);
1173
+ animation: shimmer 2s ease-in-out infinite;
1174
+ }
1175
+
1176
+ @keyframes shimmer {
1177
+ 0%, 100% { opacity: 0.4; }
1178
+ 50% { opacity: 0.8; }
1179
+ }
1180
+
1181
+ .restore-actions {
1182
+ display: flex;
1183
+ gap: 12px;
1184
+ flex-wrap: wrap;
1185
+ }
1186
+
1187
+ .btn-restore-feedback,
1188
+ .btn-terminate-session {
1189
+ padding: 10px 16px;
1190
+ border: none;
1191
+ border-radius: 8px;
1192
+ font-size: 13px;
1193
+ font-weight: 600;
1194
+ cursor: pointer;
1195
+ transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
1196
+ display: flex;
1197
+ align-items: center;
1198
+ gap: 6px;
1199
+ text-decoration: none;
1200
+ min-height: 36px;
1201
+ backdrop-filter: blur(10px);
1202
+ }
1203
+
1204
+ .btn-restore-feedback {
1205
+ background: rgba(255, 255, 255, 0.2);
1206
+ color: white;
1207
+ border: 1px solid rgba(255, 255, 255, 0.3);
1208
+ flex: 1;
1209
+ justify-content: center;
1210
+ }
1211
+
1212
+ .btn-restore-feedback:hover {
1213
+ background: rgba(255, 255, 255, 0.3);
1214
+ transform: translateY(-2px);
1215
+ box-shadow: 0 4px 16px rgba(0, 0, 0, 0.2);
1216
+ }
1217
+
1218
+ .btn-terminate-session {
1219
+ background: rgba(255, 255, 255, 0.1);
1220
+ color: rgba(255, 255, 255, 0.8);
1221
+ border: 1px solid rgba(255, 255, 255, 0.2);
1222
+ min-width: 120px;
1223
+ justify-content: center;
1224
+ }
1225
+
1226
+ .btn-terminate-session:hover {
1227
+ background: rgba(220, 38, 127, 0.6);
1228
+ color: white;
1229
+ transform: translateY(-1px);
1230
+ box-shadow: 0 4px 12px rgba(220, 38, 127, 0.3);
1231
+ }
1232
+
1233
+ .btn-restore-feedback i,
1234
+ .btn-terminate-session i {
1235
+ width: 16px;
1236
+ height: 16px;
1237
+ }
1238
+
1239
+ /* Responsive adjustments for restore banner */
1240
+ @media (max-width: 768px) {
1241
+ .feedback-restore-container {
1242
+ margin: 12px;
1243
+ border-radius: 10px;
1244
+ }
1245
+
1246
+ .feedback-restore-banner {
1247
+ padding: 12px;
1248
+ }
1249
+
1250
+ .restore-actions {
1251
+ flex-direction: column;
1252
+ gap: 8px;
1253
+ }
1254
+
1255
+ .btn-restore-feedback,
1256
+ .btn-terminate-session {
1257
+ width: 100%;
1258
+ min-width: auto;
1259
+ }
1260
+ }
src/pips/static/css/components/forms.css ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Form Components */
2
+ .form-group {
3
+ display: flex;
4
+ flex-direction: column;
5
+ margin-bottom: 20px;
6
+ }
7
+
8
+ .form-group:last-child {
9
+ margin-bottom: 0;
10
+ }
11
+
12
+ .form-label {
13
+ font-size: 15px;
14
+ font-weight: 600;
15
+ color: var(--gray-800);
16
+ letter-spacing: -0.01em;
17
+ margin-bottom: 8px;
18
+ }
19
+
20
+ .form-help {
21
+ font-size: 13px;
22
+ color: var(--gray-500);
23
+ margin-top: 6px;
24
+ margin-bottom: 0;
25
+ line-height: 1.4;
26
+ }
27
+
28
+ .form-help-inline {
29
+ font-size: 12px;
30
+ color: var(--gray-400);
31
+ font-weight: 400;
32
+ margin-left: 8px;
33
+ }
34
+
35
+ #questionInput {
36
+ width: 100%;
37
+ min-height: 140px;
38
+ padding: 20px;
39
+ border: 2px solid var(--gray-200);
40
+ border-radius: 16px;
41
+ font-size: 15px;
42
+ resize: vertical;
43
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
44
+ font-family: inherit;
45
+ background: white;
46
+ line-height: 1.6;
47
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.04);
48
+ }
49
+
50
+ #questionInput:focus {
51
+ outline: none;
52
+ border-color: var(--primary-500);
53
+ box-shadow: 0 0 0 4px rgba(14, 165, 233, 0.1), 0 4px 12px rgba(0, 0, 0, 0.08);
54
+ transform: translateY(-1px);
55
+ }
56
+
57
+ #questionInput:disabled {
58
+ background-color: var(--gray-50);
59
+ color: var(--gray-500);
60
+ cursor: not-allowed;
61
+ border-color: var(--gray-200);
62
+ }
63
+
64
+ #questionInput::placeholder {
65
+ color: var(--gray-400);
66
+ font-style: italic;
67
+ }
68
+
69
+ /* Custom Rules Input - Match Problem Description styling */
70
+ .custom-rules-input {
71
+ width: 100%;
72
+ min-height: 80px;
73
+ padding: 16px 20px;
74
+ border: 2px solid var(--gray-200);
75
+ border-radius: 16px;
76
+ font-size: 14px;
77
+ resize: vertical;
78
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
79
+ font-family: inherit;
80
+ background: white;
81
+ line-height: 1.5;
82
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.04);
83
+ }
84
+
85
+ .custom-rules-input:focus {
86
+ outline: none;
87
+ border-color: var(--primary-500);
88
+ box-shadow: 0 0 0 4px rgba(14, 165, 233, 0.1), 0 4px 12px rgba(0, 0, 0, 0.08);
89
+ transform: translateY(-1px);
90
+ }
91
+
92
+ .custom-rules-input::placeholder {
93
+ color: var(--gray-400);
94
+ font-style: italic;
95
+ }
96
+
97
+ /* iOS-style Switch */
98
+ .ios-switch-container {
99
+ display: flex;
100
+ flex-direction: column;
101
+ gap: 12px;
102
+ }
103
+
104
+ .ios-switch-wrapper {
105
+ display: flex;
106
+ align-items: center;
107
+ justify-content: center;
108
+ }
109
+
110
+ .ios-switch-input {
111
+ display: none;
112
+ }
113
+
114
+ .ios-switch-label {
115
+ position: relative;
116
+ display: inline-block;
117
+ width: 200px;
118
+ height: 44px;
119
+ cursor: pointer;
120
+ user-select: none;
121
+ }
122
+
123
+ .ios-switch-slider {
124
+ position: absolute;
125
+ top: 0;
126
+ left: 0;
127
+ right: 0;
128
+ bottom: 0;
129
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
130
+ border-radius: 22px;
131
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
132
+ display: flex;
133
+ align-items: center;
134
+ justify-content: space-between;
135
+ padding: 4px;
136
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
137
+ }
138
+
139
+ .ios-switch-slider::before {
140
+ content: '';
141
+ position: absolute;
142
+ height: 36px;
143
+ width: 96px;
144
+ left: 4px;
145
+ background: white;
146
+ border-radius: 18px;
147
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
148
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
149
+ z-index: 2;
150
+ }
151
+
152
+ .ios-switch-input:checked + .ios-switch-label .ios-switch-slider {
153
+ background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
154
+ }
155
+
156
+ .ios-switch-input:checked + .ios-switch-label .ios-switch-slider::before {
157
+ transform: translateX(96px);
158
+ }
159
+
160
+ .ios-switch-text {
161
+ position: relative;
162
+ z-index: 1;
163
+ font-size: 13px;
164
+ font-weight: 600;
165
+ color: white;
166
+ text-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
167
+ transition: all 0.3s ease;
168
+ display: flex;
169
+ align-items: center;
170
+ justify-content: center;
171
+ width: 96px;
172
+ height: 36px;
173
+ }
174
+
175
+ .ios-switch-text-left {
176
+ margin-left: 8px;
177
+ }
178
+
179
+ .ios-switch-text-right {
180
+ margin-right: 8px;
181
+ }
182
+
183
+ .ios-switch-input:checked + .ios-switch-label .ios-switch-text-left {
184
+ color: rgba(255, 255, 255, 0.6);
185
+ }
186
+
187
+ .ios-switch-input:not(:checked) + .ios-switch-label .ios-switch-text-right {
188
+ color: rgba(255, 255, 255, 0.6);
189
+ }
190
+
191
+ .mode-description {
192
+ text-align: center;
193
+ font-size: 14px;
194
+ color: var(--gray-600);
195
+ font-style: italic;
196
+ transition: all 0.3s ease;
197
+ }
198
+
199
+ .form-group label {
200
+ display: block;
201
+ margin-bottom: 8px; /* Consistent with .form-label */
202
+ font-weight: 500;
203
+ color: var(--gray-700);
204
+ font-size: 14px;
205
+ }
206
+
207
+ .form-group select,
208
+ .form-group input[type="text"],
209
+ .form-group input[type="password"],
210
+ .form-group input[type="number"] {
211
+ width: 100%;
212
+ padding: 12px;
213
+ border: 1px solid var(--gray-300);
214
+ border-radius: var(--radius-md);
215
+ font-size: 14px;
216
+ transition: all 0.2s ease;
217
+ font-family: inherit;
218
+ }
219
+
220
+ .form-group select:focus,
221
+ .form-group input:focus {
222
+ outline: none;
223
+ border-color: var(--primary-500);
224
+ box-shadow: 0 0 0 3px rgb(14 165 233 / 0.1);
225
+ }
226
+
227
+ .switch-group {
228
+ display: flex;
229
+ align-items: center;
230
+ gap: 12px;
231
+ }
232
+
233
+ .switch {
234
+ position: relative;
235
+ display: inline-block;
236
+ width: 44px;
237
+ height: 24px;
238
+ }
239
+
240
+ .switch input {
241
+ opacity: 0;
242
+ width: 0;
243
+ height: 0;
244
+ }
245
+
246
+ .slider {
247
+ position: absolute;
248
+ cursor: pointer;
249
+ top: 0;
250
+ left: 0;
251
+ right: 0;
252
+ bottom: 0;
253
+ background-color: var(--gray-300);
254
+ transition: .3s;
255
+ border-radius: 24px;
256
+ }
257
+
258
+ .slider:before {
259
+ position: absolute;
260
+ content: "";
261
+ height: 18px;
262
+ width: 18px;
263
+ left: 3px;
264
+ bottom: 3px;
265
+ background-color: white;
266
+ transition: .3s;
267
+ border-radius: 50%;
268
+ box-shadow: var(--shadow-sm);
269
+ }
270
+
271
+ input:checked + .slider {
272
+ background-color: var(--primary-500);
273
+ }
274
+
275
+ input:checked + .slider:before {
276
+ transform: translateX(20px);
277
+ }
278
+
279
+ .method-label {
280
+ font-weight: 500;
281
+ color: var(--gray-700);
282
+ font-family: 'Poppins', 'Inter', sans-serif;
283
+ }
src/pips/static/css/components/modal.css ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Modal Components */
2
+ .modal {
3
+ display: none;
4
+ position: fixed;
5
+ z-index: 1000;
6
+ left: 0;
7
+ top: 0;
8
+ width: 100%;
9
+ height: 100%;
10
+ background-color: rgba(0, 0, 0, 0.5);
11
+ backdrop-filter: blur(4px);
12
+ align-items: center;
13
+ justify-content: center;
14
+ }
15
+
16
+ /* Show the modal with flex when visible */
17
+ .modal[style*="block"] {
18
+ display: flex !important;
19
+ }
20
+
21
+ .modal-content {
22
+ background: white;
23
+ margin: 0;
24
+ padding: 0;
25
+ border-radius: var(--radius-xl);
26
+ width: min(90vw, 500px);
27
+ max-height: min(85vh, 600px);
28
+ box-shadow: var(--shadow-xl);
29
+ overflow: hidden;
30
+ display: flex;
31
+ flex-direction: column;
32
+ transform: scale(0.95);
33
+ opacity: 0;
34
+ transition: all 0.2s ease;
35
+ }
36
+
37
+ /* Animation when modal appears */
38
+ .modal[style*="block"] .modal-content {
39
+ transform: scale(1);
40
+ opacity: 1;
41
+ }
42
+
43
+ .modal-header {
44
+ padding: 24px 24px 0;
45
+ display: flex;
46
+ justify-content: space-between;
47
+ align-items: center;
48
+ flex-shrink: 0;
49
+ }
50
+
51
+ .modal-title {
52
+ font-size: 20px;
53
+ font-weight: 600;
54
+ color: var(--gray-900);
55
+ font-family: 'Poppins', 'Inter', sans-serif;
56
+ }
57
+
58
+ .close {
59
+ color: var(--gray-400);
60
+ font-size: 24px;
61
+ font-weight: normal;
62
+ cursor: pointer;
63
+ padding: 4px;
64
+ border-radius: var(--radius-sm);
65
+ transition: all 0.2s ease;
66
+ }
67
+
68
+ .close:hover {
69
+ color: var(--gray-600);
70
+ background: var(--gray-100);
71
+ }
72
+
73
+ .modal-body {
74
+ padding: 24px;
75
+ overflow-y: auto;
76
+ flex: 1;
77
+ min-height: 0;
78
+ }
79
+
80
+ .modal-body::-webkit-scrollbar {
81
+ width: 6px;
82
+ }
83
+
84
+ .modal-body::-webkit-scrollbar-track {
85
+ background: var(--gray-100);
86
+ }
87
+
88
+ .modal-body::-webkit-scrollbar-thumb {
89
+ background: var(--gray-300);
90
+ border-radius: 3px;
91
+ }
92
+
93
+ .modal-body::-webkit-scrollbar-thumb:hover {
94
+ background: var(--gray-400);
95
+ }
96
+
97
+ /* Settings Tabs */
98
+ .settings-tabs {
99
+ display: flex;
100
+ border-bottom: 1px solid var(--border-light);
101
+ margin-bottom: 24px;
102
+ gap: 0;
103
+ }
104
+
105
+ .tab-button {
106
+ background: none;
107
+ border: none;
108
+ padding: 12px 20px;
109
+ font-size: 14px;
110
+ font-weight: 500;
111
+ color: var(--gray-600);
112
+ cursor: pointer;
113
+ border-bottom: 2px solid transparent;
114
+ transition: all 0.2s ease;
115
+ position: relative;
116
+ }
117
+
118
+ .tab-button:hover {
119
+ color: var(--gray-800);
120
+ background: var(--gray-50);
121
+ }
122
+
123
+ .tab-button.active {
124
+ color: var(--primary-600);
125
+ border-bottom-color: var(--primary-600);
126
+ background: var(--primary-50);
127
+ }
128
+
129
+ .tab-button:first-child {
130
+ border-radius: var(--radius-sm) 0 0 0;
131
+ }
132
+
133
+ .tab-button:last-child {
134
+ border-radius: 0 var(--radius-sm) 0 0;
135
+ }
136
+
137
+ /* Tab Content */
138
+ .tab-content {
139
+ display: none;
140
+ }
141
+
142
+ .tab-content.active {
143
+ display: block;
144
+ animation: fadeIn 0.2s ease-in-out;
145
+ }
146
+
147
+ @keyframes fadeIn {
148
+ from {
149
+ opacity: 0;
150
+ transform: translateY(10px);
151
+ }
152
+ to {
153
+ opacity: 1;
154
+ transform: translateY(0);
155
+ }
156
+ }
157
+
158
+ /* Settings Form Submit Button */
159
+ #settingsForm button[type="submit"] {
160
+ margin-top: 32px;
161
+ }
162
+
163
+ /* Mobile responsiveness */
164
+ @media (max-width: 640px) {
165
+ .modal-content {
166
+ width: 95vw;
167
+ max-height: 90vh;
168
+ margin: 0 8px; /* Small margin to prevent edge touching */
169
+ }
170
+
171
+ .modal-header,
172
+ .modal-body {
173
+ padding: 16px;
174
+ }
175
+
176
+ .modal-title {
177
+ font-size: 18px;
178
+ }
179
+
180
+ .settings-tabs {
181
+ margin-bottom: 16px;
182
+ }
183
+
184
+ .tab-button {
185
+ padding: 10px 16px;
186
+ font-size: 13px;
187
+ }
188
+
189
+ /* Reduce margin on mobile */
190
+ #settingsForm button[type="submit"] {
191
+ margin-top: 24px;
192
+ }
193
+ }
src/pips/static/css/components/panels.css ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Panel Layout Components */
2
+ .left-panel {
3
+ width: min(480px, 40vw);
4
+ min-width: 320px;
5
+ max-width: 480px;
6
+ background: linear-gradient(135deg, #fafbfc 0%, #f8fafc 100%);
7
+ border-right: 1px solid var(--gray-200);
8
+ display: flex;
9
+ flex-direction: column;
10
+ box-shadow: 2px 0 10px rgba(0, 0, 0, 0.05);
11
+ }
12
+
13
+ .right-panel {
14
+ flex: 1;
15
+ min-width: 0;
16
+ background: white;
17
+ display: flex;
18
+ flex-direction: column;
19
+ }
20
+
21
+ .header {
22
+ padding: 24px 24px 20px;
23
+ background: linear-gradient(135deg, var(--primary-600) 0%, var(--primary-700) 100%);
24
+ color: white;
25
+ border-bottom: 1px solid var(--primary-700);
26
+ position: relative;
27
+ overflow: hidden;
28
+ flex-shrink: 0;
29
+ }
30
+
31
+ .header::before {
32
+ content: '';
33
+ position: absolute;
34
+ top: 0;
35
+ left: 0;
36
+ right: 0;
37
+ bottom: 0;
38
+ background: linear-gradient(135deg, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0.05) 100%);
39
+ pointer-events: none;
40
+ }
41
+
42
+ .header h1 {
43
+ font-size: 28px;
44
+ font-weight: 700;
45
+ margin-bottom: 8px;
46
+ letter-spacing: -0.02em;
47
+ text-shadow: 0 2px 4px rgba(0,0,0,0.1);
48
+ position: relative;
49
+ z-index: 1;
50
+ }
51
+
52
+ .header p {
53
+ opacity: 0.95;
54
+ font-size: 14px;
55
+ font-weight: 500;
56
+ letter-spacing: 0.01em;
57
+ position: relative;
58
+ z-index: 1;
59
+ margin-bottom: 0;
60
+ }
61
+
62
+ .input-section {
63
+ padding: 24px 20px;
64
+ flex: 1;
65
+ display: flex;
66
+ flex-direction: column;
67
+ gap: 20px;
68
+ background: linear-gradient(180deg, rgba(255,255,255,0.8) 0%, rgba(255,255,255,0.95) 100%);
69
+ overflow-y: auto;
70
+ }
71
+
72
+ .footer {
73
+ padding: 16px 24px;
74
+ background: white;
75
+ border-top: 1px solid var(--gray-200);
76
+ display: flex;
77
+ justify-content: space-between;
78
+ align-items: center;
79
+ }
80
+
81
+ .footer-info {
82
+ font-size: 13px;
83
+ color: var(--gray-500);
84
+ }
src/pips/static/css/components/responsive.css ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Responsive Design */
2
+ @media (max-width: 1200px) {
3
+ .left-panel {
4
+ width: min(420px, 45vw);
5
+ min-width: 300px;
6
+ }
7
+
8
+ .header {
9
+ padding: 20px 20px 16px;
10
+ }
11
+
12
+ .header h1 {
13
+ font-size: 26px;
14
+ margin-bottom: 6px;
15
+ }
16
+
17
+ .header p {
18
+ font-size: 13px;
19
+ }
20
+
21
+ .input-section {
22
+ padding: 20px 18px;
23
+ gap: 18px;
24
+ }
25
+ }
26
+
27
+ @media (max-width: 1024px) {
28
+ .container {
29
+ flex-direction: column;
30
+ }
31
+
32
+ .left-panel {
33
+ width: 100%;
34
+ height: 50vh;
35
+ min-width: unset;
36
+ max-width: unset;
37
+ }
38
+
39
+ .right-panel {
40
+ height: 50vh;
41
+ }
42
+
43
+ .header {
44
+ padding: 18px 16px 14px;
45
+ }
46
+
47
+ .header h1 {
48
+ font-size: 24px;
49
+ margin-bottom: 6px;
50
+ }
51
+
52
+ .header p {
53
+ font-size: 13px;
54
+ }
55
+
56
+ .input-section {
57
+ padding: 18px 16px;
58
+ gap: 16px;
59
+ }
60
+ }
61
+
62
+ @media (max-width: 768px) {
63
+ .left-panel {
64
+ height: 60vh;
65
+ }
66
+
67
+ .right-panel {
68
+ height: 40vh;
69
+ }
70
+
71
+ .header {
72
+ padding: 16px 14px 12px;
73
+ }
74
+
75
+ .header h1 {
76
+ font-size: 22px;
77
+ margin-bottom: 5px;
78
+ }
79
+
80
+ .header p {
81
+ font-size: 12px;
82
+ }
83
+
84
+ .input-section {
85
+ padding: 16px 14px;
86
+ gap: 14px;
87
+ }
88
+
89
+ #questionInput {
90
+ min-height: 100px;
91
+ padding: 14px;
92
+ }
93
+ }
94
+
95
+ @media (max-width: 600px) {
96
+ .header {
97
+ padding: 14px 12px 10px;
98
+ }
99
+
100
+ .header h1 {
101
+ font-size: 20px;
102
+ margin-bottom: 4px;
103
+ }
104
+
105
+ .header p {
106
+ font-size: 11px;
107
+ }
108
+
109
+ .input-section {
110
+ padding: 14px 12px;
111
+ gap: 12px;
112
+ }
113
+
114
+ .button-group {
115
+ flex-direction: column;
116
+ gap: 12px;
117
+ }
118
+
119
+ .btn {
120
+ padding: 14px 20px;
121
+ font-size: 14px;
122
+ }
123
+ }
src/pips/static/css/components/sessions.css ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Sessions Management Styles */
2
+ .sessions-header {
3
+ display: flex;
4
+ align-items: center;
5
+ justify-content: space-between;
6
+ cursor: pointer;
7
+ padding: 12px 16px;
8
+ background: linear-gradient(135deg, var(--gray-50) 0%, white 100%);
9
+ border: 2px solid var(--gray-200);
10
+ border-radius: 12px;
11
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
12
+ }
13
+
14
+ .sessions-header:hover {
15
+ border-color: var(--primary-300);
16
+ background: linear-gradient(135deg, var(--primary-50) 0%, white 100%);
17
+ transform: translateY(-1px);
18
+ box-shadow: 0 4px 12px rgba(14, 165, 233, 0.15);
19
+ }
20
+
21
+ .sessions-header .form-label {
22
+ margin: 0;
23
+ display: flex;
24
+ align-items: center;
25
+ font-weight: 600;
26
+ color: var(--gray-800);
27
+ }
28
+
29
+ .sessions-toggle {
30
+ background: none;
31
+ border: none;
32
+ cursor: pointer;
33
+ padding: 4px;
34
+ border-radius: 6px;
35
+ transition: all 0.2s ease;
36
+ color: var(--gray-600);
37
+ }
38
+
39
+ .sessions-toggle:hover {
40
+ background: var(--gray-100);
41
+ color: var(--primary-600);
42
+ }
43
+
44
+ .sessions-toggle.expanded {
45
+ transform: rotate(180deg);
46
+ }
47
+
48
+ .sessions-container {
49
+ max-height: 0;
50
+ overflow: hidden;
51
+ transition: max-height 0.3s cubic-bezier(0.4, 0, 0.2, 1);
52
+ background: white;
53
+ border-left: 2px solid var(--gray-200);
54
+ border-right: 2px solid var(--gray-200);
55
+ border-bottom: 2px solid var(--gray-200);
56
+ border-radius: 0 0 12px 12px;
57
+ margin-top: -2px;
58
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.04);
59
+ }
60
+
61
+ .sessions-container.expanded {
62
+ max-height: 450px;
63
+ border-color: var(--primary-300);
64
+ }
65
+
66
+ .sessions-list {
67
+ max-height: 350px;
68
+ overflow-y: auto;
69
+ padding: 16px;
70
+ }
71
+
72
+ .session-item {
73
+ display: flex;
74
+ align-items: center;
75
+ padding: 14px 16px;
76
+ border-radius: 10px;
77
+ cursor: pointer;
78
+ transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
79
+ margin-bottom: 8px;
80
+ border: 1px solid transparent;
81
+ position: relative;
82
+ min-height: 68px;
83
+ }
84
+
85
+ .session-item:hover {
86
+ background: var(--gray-50);
87
+ border-color: var(--gray-200);
88
+ transform: translateX(2px);
89
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
90
+ }
91
+
92
+ .session-item.current-session {
93
+ background: linear-gradient(135deg, var(--primary-50) 0%, var(--primary-25) 100%);
94
+ border-color: var(--primary-200);
95
+ }
96
+
97
+ .session-item.current-session:hover {
98
+ background: linear-gradient(135deg, var(--primary-100) 0%, var(--primary-50) 100%);
99
+ border-color: var(--primary-300);
100
+ }
101
+
102
+ .session-item.active-solving {
103
+ background: linear-gradient(135deg, var(--success-50) 0%, var(--success-25) 100%);
104
+ border-color: var(--success-200);
105
+ animation: pulse-session 2s ease-in-out infinite;
106
+ }
107
+
108
+ @keyframes pulse-session {
109
+ 0%, 100% { box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.4); }
110
+ 50% { box-shadow: 0 0 0 4px rgba(34, 197, 94, 0.1); }
111
+ }
112
+
113
+ .session-item.selected {
114
+ background: linear-gradient(135deg, var(--primary-500) 0%, var(--primary-600) 100%) !important;
115
+ color: white !important;
116
+ border: 2px solid var(--primary-700) !important;
117
+ box-shadow: 0 4px 16px rgba(14, 165, 233, 0.4) !important;
118
+ transform: scale(1.02) !important;
119
+ position: relative !important;
120
+ animation: selected-pulse 2s ease-in-out infinite alternate !important;
121
+ }
122
+
123
+ .session-item.selected .session-title,
124
+ .session-item.selected .session-meta {
125
+ color: white !important;
126
+ opacity: 1 !important;
127
+ visibility: visible !important;
128
+ display: block !important;
129
+ text-shadow: 0 1px 2px rgba(0, 0, 0, 0.2) !important;
130
+ }
131
+
132
+ .session-item.selected .session-icon {
133
+ background: rgba(255, 255, 255, 0.25) !important;
134
+ color: white !important;
135
+ border: 1px solid rgba(255, 255, 255, 0.3) !important;
136
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
137
+ }
138
+
139
+ .session-item.selected .status-dot {
140
+ background: white !important;
141
+ box-shadow: 0 0 0 2px var(--primary-600) !important;
142
+ }
143
+
144
+ .session-item.selected:hover {
145
+ background: linear-gradient(135deg, var(--primary-600) 0%, var(--primary-700) 100%) !important;
146
+ box-shadow: 0 6px 20px rgba(14, 165, 233, 0.5) !important;
147
+ transform: scale(1.03) translateX(2px) !important;
148
+ }
149
+
150
+ /* Additional visual enhancement for selected sessions */
151
+ .session-item.selected::before {
152
+ content: '';
153
+ position: absolute;
154
+ left: 0;
155
+ top: 0;
156
+ bottom: 0;
157
+ width: 4px;
158
+ background: white !important;
159
+ border-radius: 0 2px 2px 0;
160
+ box-shadow: 0 0 8px rgba(255, 255, 255, 0.5);
161
+ }
162
+
163
+ .session-item.selected {
164
+ position: relative;
165
+ animation: selected-pulse 2s ease-in-out infinite alternate;
166
+ }
167
+
168
+ @keyframes selected-pulse {
169
+ 0% { box-shadow: 0 4px 16px rgba(14, 165, 233, 0.4); }
170
+ 100% { box-shadow: 0 4px 16px rgba(14, 165, 233, 0.6); }
171
+ }
172
+
173
+ .session-icon {
174
+ width: 36px;
175
+ height: 36px;
176
+ background: var(--gray-100);
177
+ border-radius: 8px;
178
+ display: flex;
179
+ align-items: center;
180
+ justify-content: center;
181
+ margin-right: 12px;
182
+ transition: all 0.2s ease;
183
+ color: var(--gray-600);
184
+ flex-shrink: 0;
185
+ }
186
+
187
+ .session-item.current-session .session-icon {
188
+ background: var(--primary-100);
189
+ color: var(--primary-600);
190
+ }
191
+
192
+ .session-info {
193
+ flex: 1;
194
+ min-width: 0;
195
+ padding-right: 8px;
196
+ }
197
+
198
+ .session-title {
199
+ font-weight: 600;
200
+ font-size: 14px;
201
+ color: var(--gray-900);
202
+ margin-bottom: 4px;
203
+ line-height: 1.3;
204
+ display: -webkit-box;
205
+ -webkit-line-clamp: 2;
206
+ -webkit-box-orient: vertical;
207
+ overflow: hidden;
208
+ word-break: break-word;
209
+ }
210
+
211
+ .session-meta {
212
+ font-size: 12px;
213
+ color: var(--gray-500);
214
+ line-height: 1.3;
215
+ display: -webkit-box;
216
+ -webkit-line-clamp: 1;
217
+ -webkit-box-orient: vertical;
218
+ overflow: hidden;
219
+ word-break: break-word;
220
+ }
221
+
222
+ .session-status {
223
+ display: flex;
224
+ align-items: center;
225
+ justify-content: center;
226
+ margin-left: auto;
227
+ margin-right: 8px;
228
+ flex-shrink: 0;
229
+ }
230
+
231
+ .status-dot {
232
+ width: 10px;
233
+ height: 10px;
234
+ border-radius: 50%;
235
+ background: var(--gray-400);
236
+ }
237
+
238
+ .session-status.active .status-dot {
239
+ background: var(--success-500);
240
+ animation: pulse-dot 2s ease-in-out infinite;
241
+ }
242
+
243
+ .session-status.solving .status-dot {
244
+ background: var(--warning-500);
245
+ animation: pulse-dot 1s ease-in-out infinite;
246
+ }
247
+
248
+ .session-status.completed .status-dot {
249
+ background: var(--primary-500);
250
+ }
251
+
252
+ .session-status.interrupted .status-dot {
253
+ background: var(--error-500);
254
+ }
255
+
256
+ @keyframes pulse-dot {
257
+ 0%, 100% { opacity: 1; transform: scale(1); }
258
+ 50% { opacity: 0.7; transform: scale(1.3); }
259
+ }
260
+
261
+ /* Session management actions (bottom of sessions panel) */
262
+ .sessions-actions {
263
+ display: flex;
264
+ gap: 10px;
265
+ padding: 16px;
266
+ border-top: 1px solid var(--gray-200);
267
+ background: linear-gradient(135deg, var(--gray-25) 0%, var(--gray-50) 100%);
268
+ }
269
+
270
+ /* Session action buttons container */
271
+ .session-actions {
272
+ display: flex;
273
+ align-items: center;
274
+ gap: 4px;
275
+ opacity: 0;
276
+ transform: translateX(8px);
277
+ transition: all 0.25s cubic-bezier(0.4, 0, 0.2, 1);
278
+ margin-left: 8px;
279
+ flex-shrink: 0;
280
+ }
281
+
282
+ .session-item:hover .session-actions {
283
+ opacity: 1;
284
+ transform: translateX(0);
285
+ }
286
+
287
+ /* Always show actions on selected items */
288
+ .session-item.selected .session-actions {
289
+ opacity: 1;
290
+ transform: translateX(0);
291
+ }
292
+
293
+ /* Base styling for action buttons */
294
+ .session-download,
295
+ .session-delete {
296
+ transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
297
+ padding: 6px;
298
+ border-radius: 8px;
299
+ background: none;
300
+ border: none;
301
+ cursor: pointer;
302
+ flex-shrink: 0;
303
+ display: flex;
304
+ align-items: center;
305
+ justify-content: center;
306
+ width: 32px;
307
+ height: 32px;
308
+ position: relative;
309
+ overflow: hidden;
310
+ }
311
+
312
+ .session-download i,
313
+ .session-download svg,
314
+ .session-delete i,
315
+ .session-delete svg {
316
+ width: 14px !important;
317
+ height: 14px !important;
318
+ stroke-width: 2;
319
+ }
320
+
321
+ .session-download::before,
322
+ .session-delete::before {
323
+ content: '';
324
+ position: absolute;
325
+ top: 0;
326
+ left: 0;
327
+ right: 0;
328
+ bottom: 0;
329
+ border-radius: inherit;
330
+ background: currentColor;
331
+ opacity: 0;
332
+ transition: opacity 0.2s ease;
333
+ z-index: -1;
334
+ }
335
+
336
+ /* Download button styling */
337
+ .session-download {
338
+ color: var(--primary-500);
339
+ }
340
+
341
+ .session-download:hover {
342
+ color: var(--primary-600);
343
+ transform: translateY(-1px);
344
+ box-shadow: 0 2px 8px rgba(14, 165, 233, 0.25);
345
+ }
346
+
347
+ .session-download:hover::before {
348
+ opacity: 0.1;
349
+ }
350
+
351
+ .session-download:active {
352
+ transform: translateY(0);
353
+ box-shadow: 0 1px 4px rgba(14, 165, 233, 0.3);
354
+ }
355
+
356
+ /* Delete button styling */
357
+ .session-delete {
358
+ color: var(--error-500);
359
+ }
360
+
361
+ .session-delete:hover {
362
+ color: var(--error-600);
363
+ transform: translateY(-1px);
364
+ box-shadow: 0 2px 8px rgba(239, 68, 68, 0.25);
365
+ }
366
+
367
+ .session-delete:hover::before {
368
+ opacity: 0.1;
369
+ }
370
+
371
+ .session-delete:active {
372
+ transform: translateY(0);
373
+ box-shadow: 0 1px 4px rgba(239, 68, 68, 0.3);
374
+ }
375
+
376
+ /* Selected session button styling */
377
+ .session-item.selected .session-actions {
378
+ opacity: 1;
379
+ }
380
+
381
+ .session-item.selected .session-download,
382
+ .session-item.selected .session-delete {
383
+ color: rgba(255, 255, 255, 0.9);
384
+ background: rgba(255, 255, 255, 0.1);
385
+ border: 1px solid rgba(255, 255, 255, 0.2);
386
+ }
387
+
388
+ .session-item.selected .session-download:hover,
389
+ .session-item.selected .session-delete:hover {
390
+ color: white;
391
+ background: rgba(255, 255, 255, 0.2);
392
+ border-color: rgba(255, 255, 255, 0.3);
393
+ transform: translateY(-1px) scale(1.05);
394
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
395
+ }
396
+
397
+ .session-item.selected .session-download:hover::before,
398
+ .session-item.selected .session-delete:hover::before {
399
+ opacity: 0.15;
400
+ }
401
+
402
+ /* Responsive adjustments */
403
+ @media (max-width: 768px) {
404
+ .session-download,
405
+ .session-delete {
406
+ width: 28px;
407
+ height: 28px;
408
+ padding: 4px;
409
+ }
410
+
411
+ .session-actions {
412
+ gap: 2px;
413
+ margin-left: 4px;
414
+ }
415
+ }
416
+
417
+ .session-item.completed-session {
418
+ background: linear-gradient(135deg, var(--success-25) 0%, var(--success-50) 100%);
419
+ border-color: var(--success-200);
420
+ }
421
+
422
+ .session-item.completed-session:hover {
423
+ background: linear-gradient(135deg, var(--success-50) 0%, var(--success-100) 100%);
424
+ border-color: var(--success-300);
425
+ }
426
+
427
+ .session-item.completed-session .session-icon {
428
+ background: var(--success-100);
429
+ color: var(--success-700);
430
+ }
431
+
432
+ .session-item.interrupted-session {
433
+ background: linear-gradient(135deg, var(--warning-25) 0%, var(--warning-50) 100%);
434
+ border-color: var(--warning-200);
435
+ }
436
+
437
+ .session-item.interrupted-session:hover {
438
+ background: linear-gradient(135deg, var(--warning-50) 0%, var(--warning-100) 100%);
439
+ border-color: var(--warning-300);
440
+ }
441
+
442
+ .session-item.interrupted-session .session-icon {
443
+ background: var(--warning-100);
444
+ color: var(--warning-700);
445
+ }
446
+
447
+ /* Add styling and animation for solving sessions */
448
+ .session-item.solving-session {
449
+ background: linear-gradient(135deg, var(--primary-25) 0%, var(--primary-50) 100%);
450
+ border-color: var(--primary-200);
451
+ }
452
+
453
+ .session-item.solving-session:hover {
454
+ background: linear-gradient(135deg, var(--primary-50) 0%, var(--primary-100) 100%);
455
+ border-color: var(--primary-300);
456
+ }
457
+
458
+ .session-item.solving-session .session-icon {
459
+ background: var(--primary-100);
460
+ color: var(--primary-700);
461
+ }
462
+
463
+ .session-item.solving-session .session-icon i,
464
+ .session-item.solving-session .session-icon svg {
465
+ animation: spin 0.8s linear infinite;
466
+ }
467
+
468
+ /* Custom scrollbar for sessions list */
469
+ .sessions-list::-webkit-scrollbar {
470
+ width: 4px;
471
+ }
472
+
473
+ .sessions-list::-webkit-scrollbar-track {
474
+ background: var(--gray-100);
475
+ border-radius: 2px;
476
+ }
477
+
478
+ .sessions-list::-webkit-scrollbar-thumb {
479
+ background: var(--gray-300);
480
+ border-radius: 2px;
481
+ }
482
+
483
+ .sessions-list::-webkit-scrollbar-thumb:hover {
484
+ background: var(--gray-400);
485
+ }
486
+
487
+ /* Sessions help text improvement */
488
+ .form-group .form-help {
489
+ margin-top: 8px;
490
+ font-size: 12px;
491
+ color: var(--gray-500);
492
+ line-height: 1.4;
493
+ padding-left: 4px;
494
+ }
495
+
496
+ /* Read-only session message */
497
+ .session-readonly-message {
498
+ display: flex;
499
+ align-items: center;
500
+ justify-content: center;
501
+ background: var(--warning-50);
502
+ border: 1px solid var(--warning-200);
503
+ border-radius: 8px;
504
+ padding: 12px;
505
+ margin-top: 8px;
506
+ font-size: 13px;
507
+ color: var(--warning-700);
508
+ text-align: center;
509
+ transition: all 0.3s ease;
510
+ box-shadow: 0 2px 4px rgba(251, 191, 36, 0.1);
511
+ }
512
+
513
+ .session-readonly-message:hover {
514
+ background: var(--warning-100);
515
+ border-color: var(--warning-300);
516
+ transform: translateY(-1px);
517
+ box-shadow: 0 4px 8px rgba(251, 191, 36, 0.15);
518
+ }
519
+
520
+ .session-readonly-message i {
521
+ flex-shrink: 0;
522
+ margin-right: 6px;
523
+ }
src/pips/static/css/components/utilities.css ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Utility Components */
2
+ .status-bar {
3
+ padding: 12px 24px;
4
+ background: var(--primary-50);
5
+ border-bottom: 1px solid var(--primary-200);
6
+ font-size: 13px;
7
+ font-weight: 500;
8
+ color: var(--primary-700);
9
+ display: none;
10
+ }
11
+
12
+ .status-bar.show {
13
+ display: block;
14
+ }
15
+
16
+ .status-bar.error {
17
+ background: var(--error-50);
18
+ border-color: var(--error-200);
19
+ color: var(--error-700);
20
+ }
21
+
22
+ .status-bar.success {
23
+ background: var(--success-50);
24
+ border-color: var(--success-200);
25
+ color: var(--success-700);
26
+ }
27
+
28
+ .status-bar.warning {
29
+ background: var(--warning-50);
30
+ border-color: var(--warning-200);
31
+ color: var(--warning-700);
32
+ }
33
+
34
+ .progress-container {
35
+ padding: 0 24px 16px;
36
+ display: none;
37
+ }
38
+
39
+ .progress-container.show {
40
+ display: block;
41
+ }
42
+
43
+ .progress-bar {
44
+ width: 100%;
45
+ height: 6px;
46
+ background: var(--gray-200);
47
+ border-radius: var(--radius-sm);
48
+ overflow: hidden;
49
+ }
50
+
51
+ .progress-fill {
52
+ height: 100%;
53
+ background: linear-gradient(90deg, var(--primary-500), var(--primary-600));
54
+ width: 0%;
55
+ transition: width 0.3s ease;
56
+ border-radius: var(--radius-sm);
57
+ }
58
+
59
+ /* Image Upload Components */
60
+ .image-upload {
61
+ position: relative;
62
+ background: white;
63
+ border: 2px dashed var(--gray-300);
64
+ border-radius: 16px;
65
+ padding: 24px;
66
+ text-align: center;
67
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
68
+ cursor: pointer;
69
+ }
70
+
71
+ .image-upload:hover {
72
+ border-color: var(--primary-400);
73
+ background: var(--primary-50);
74
+ transform: translateY(-1px);
75
+ box-shadow: 0 4px 12px rgba(14, 165, 233, 0.15);
76
+ }
77
+
78
+ .image-upload.has-image {
79
+ border-style: solid;
80
+ border-color: var(--success-500);
81
+ background: var(--success-50);
82
+ }
83
+
84
+ .image-upload.drag-over {
85
+ border-color: var(--primary-500);
86
+ background: var(--primary-100);
87
+ transform: scale(1.02);
88
+ box-shadow: 0 8px 24px rgba(14, 165, 233, 0.25);
89
+ }
90
+
91
+ .image-upload-text {
92
+ margin-top: 12px;
93
+ font-size: 14px;
94
+ color: var(--gray-600);
95
+ font-weight: 500;
96
+ }
97
+
98
+ .image-upload.has-image .image-upload-text {
99
+ color: var(--success-700);
100
+ }
101
+
102
+ .image-upload-btn {
103
+ display: inline-flex;
104
+ align-items: center;
105
+ gap: 12px;
106
+ padding: 16px 24px;
107
+ background: linear-gradient(135deg, var(--gray-50) 0%, var(--gray-100) 100%);
108
+ color: var(--gray-700);
109
+ border: 2px solid var(--gray-200);
110
+ border-radius: 12px;
111
+ cursor: pointer;
112
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
113
+ font-size: 14px;
114
+ font-weight: 600;
115
+ letter-spacing: 0.01em;
116
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.04);
117
+ }
118
+
119
+ .image-upload-btn:hover {
120
+ background: linear-gradient(135deg, var(--primary-50) 0%, var(--primary-100) 100%);
121
+ border-color: var(--primary-300);
122
+ color: var(--primary-700);
123
+ transform: translateY(-2px);
124
+ box-shadow: 0 4px 12px rgba(14, 165, 233, 0.2);
125
+ }
126
+
127
+ .image-upload input {
128
+ display: none;
129
+ }
130
+
131
+ .image-preview {
132
+ margin-top: 20px;
133
+ max-width: 100%;
134
+ max-height: 240px;
135
+ border-radius: 12px;
136
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12);
137
+ object-fit: cover;
138
+ border: 3px solid white;
139
+ }
140
+
141
+ /* Expandable message styles */
142
+ .expandable-message {
143
+ position: relative;
144
+ }
145
+
146
+ .expand-toggle {
147
+ display: inline-flex;
148
+ align-items: center;
149
+ gap: 6px;
150
+ padding: 4px 8px;
151
+ background: var(--gray-100);
152
+ border: 1px solid var(--gray-200);
153
+ border-radius: 6px;
154
+ color: var(--gray-600);
155
+ font-size: 12px;
156
+ font-weight: 500;
157
+ cursor: pointer;
158
+ transition: all 0.2s ease;
159
+ margin-left: 8px;
160
+ vertical-align: middle;
161
+ }
162
+
163
+ .expand-toggle:hover {
164
+ background: var(--gray-200);
165
+ border-color: var(--gray-300);
166
+ color: var(--gray-700);
167
+ }
168
+
169
+ .expand-toggle i {
170
+ transition: transform 0.2s ease;
171
+ }
172
+
173
+ .expand-toggle.expanded i {
174
+ transform: rotate(180deg);
175
+ }
176
+
177
+ .expandable-content {
178
+ max-height: 0;
179
+ overflow: hidden;
180
+ transition: max-height 0.3s cubic-bezier(0.4, 0, 0.2, 1);
181
+ margin-top: 12px;
182
+ background: var(--gray-50);
183
+ border-radius: 8px;
184
+ border: 1px solid var(--gray-200);
185
+ }
186
+
187
+ .expandable-content.expanded {
188
+ max-height: 400px;
189
+ overflow-y: auto;
190
+ }
191
+
192
+ /* Custom scrollbar for expandable content */
193
+ .expandable-content::-webkit-scrollbar {
194
+ width: 6px;
195
+ }
196
+
197
+ .expandable-content::-webkit-scrollbar-track {
198
+ background: var(--gray-100);
199
+ border-radius: 3px;
200
+ }
201
+
202
+ .expandable-content::-webkit-scrollbar-thumb {
203
+ background: var(--gray-400);
204
+ border-radius: 3px;
205
+ }
206
+
207
+ .expandable-content::-webkit-scrollbar-thumb:hover {
208
+ background: var(--gray-500);
209
+ }
210
+
211
+ .expandable-content-inner {
212
+ padding: 16px;
213
+ }
214
+
215
+ .prompt-conversation {
216
+ display: flex;
217
+ flex-direction: column;
218
+ gap: 12px;
219
+ }
220
+
221
+ .prompt-message {
222
+ background: white;
223
+ border-radius: 6px;
224
+ padding: 12px;
225
+ border-left: 3px solid var(--gray-300);
226
+ }
227
+
228
+ .prompt-message.system {
229
+ border-left-color: var(--warning-500);
230
+ }
231
+
232
+ .prompt-message.user {
233
+ border-left-color: var(--primary-500);
234
+ }
235
+
236
+ .prompt-message.assistant {
237
+ border-left-color: var(--success-500);
238
+ }
239
+
240
+ .prompt-role {
241
+ font-size: 11px;
242
+ font-weight: 600;
243
+ text-transform: uppercase;
244
+ color: var(--gray-500);
245
+ margin-bottom: 6px;
246
+ letter-spacing: 0.5px;
247
+ }
248
+
249
+ .prompt-message.system .prompt-role {
250
+ color: var(--warning-600);
251
+ }
252
+
253
+ .prompt-message.user .prompt-role {
254
+ color: var(--primary-600);
255
+ }
256
+
257
+ .prompt-message.assistant .prompt-role {
258
+ color: var(--success-600);
259
+ }
260
+
261
+ .prompt-content {
262
+ font-size: 13px;
263
+ line-height: 1.4;
264
+ color: var(--gray-700);
265
+ white-space: pre-wrap;
266
+ word-break: break-word;
267
+ }
268
+
269
+ .prompt-content pre {
270
+ background: var(--gray-50);
271
+ border: 1px solid var(--gray-200);
272
+ border-radius: 4px;
273
+ padding: 8px;
274
+ margin: 8px 0;
275
+ font-size: 12px;
276
+ overflow-x: auto;
277
+ }
278
+
279
+ .prompt-content code {
280
+ background: var(--gray-100);
281
+ padding: 2px 4px;
282
+ border-radius: 3px;
283
+ font-size: 12px;
284
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
285
+ }
286
+
287
+ .prompt-content pre code {
288
+ background: transparent;
289
+ padding: 0;
290
+ border-radius: 0;
291
+ }
292
+
293
+ .prompt-description {
294
+ font-size: 12px;
295
+ color: var(--gray-500);
296
+ margin-bottom: 8px;
297
+ font-style: italic;
298
+ }
299
+
300
+ .prompt-image {
301
+ background: var(--gray-100);
302
+ border: 1px dashed var(--gray-300);
303
+ border-radius: 4px;
304
+ padding: 8px 12px;
305
+ font-size: 12px;
306
+ color: var(--gray-600);
307
+ text-align: center;
308
+ margin: 4px 0;
309
+ }
310
+
311
+ /* Loading and streaming animations */
312
+ .loading-dots::after {
313
+ content: '';
314
+ animation: dots 1.5s steps(5, end) infinite;
315
+ }
316
+
317
+ @keyframes dots {
318
+ 0%, 20% { color: rgba(0,0,0,0); text-shadow: .25em 0 0 rgba(0,0,0,0), .5em 0 0 rgba(0,0,0,0); }
319
+ 40% { color: var(--gray-500); text-shadow: .25em 0 0 rgba(0,0,0,0), .5em 0 0 rgba(0,0,0,0); }
320
+ 60% { text-shadow: .25em 0 0 var(--gray-500), .5em 0 0 rgba(0,0,0,0); }
321
+ 80%, 100% { text-shadow: .25em 0 0 var(--gray-500), .5em 0 0 var(--gray-500); }
322
+ }
323
+
324
+ /* Streaming and execution indicators */
325
+ .streaming-indicator {
326
+ display: inline-flex;
327
+ align-items: center;
328
+ gap: 8px;
329
+ padding: 8px 12px;
330
+ background: var(--primary-50);
331
+ border: 1px solid var(--primary-200);
332
+ border-radius: var(--radius-md);
333
+ font-size: 13px;
334
+ color: var(--primary-700);
335
+ margin: 8px 0;
336
+ }
337
+
338
+ .execution-spinner {
339
+ display: inline-flex;
340
+ align-items: center;
341
+ gap: 8px;
342
+ padding: 8px 12px;
343
+ background: var(--warning-50);
344
+ border: 1px solid var(--warning-200);
345
+ border-radius: var(--radius-md);
346
+ font-size: 13px;
347
+ color: var(--warning-700);
348
+ margin: 8px 0;
349
+ }
350
+
351
+ .spinner {
352
+ width: 16px;
353
+ height: 16px;
354
+ border: 2px solid transparent;
355
+ border-top: 2px solid currentColor;
356
+ border-radius: 50%;
357
+ animation: spin 0.8s linear infinite;
358
+ }
359
+
360
+ @keyframes spin {
361
+ 0% { transform: rotate(0deg); }
362
+ 100% { transform: rotate(360deg); }
363
+ }
364
+
365
+ .streaming-text {
366
+ background: var(--primary-50);
367
+ padding: 2px 6px;
368
+ border-radius: var(--radius-sm);
369
+ border-left: 3px solid var(--primary-500);
370
+ animation: pulse-blue 1.5s ease-in-out infinite;
371
+ min-height: 20px;
372
+ display: block;
373
+ margin-top: 8px;
374
+ }
375
+
376
+ @keyframes pulse-blue {
377
+ 0%, 100% { background-color: var(--primary-50); }
378
+ 50% { background-color: var(--primary-100); }
379
+ }
380
+
381
+ .typing-indicator {
382
+ display: inline;
383
+ color: var(--primary-600);
384
+ font-weight: bold;
385
+ }
386
+
387
+ .typing-indicator::after {
388
+ content: '|';
389
+ animation: blink 1s infinite;
390
+ }
391
+
392
+ @keyframes blink {
393
+ 0%, 50% { opacity: 1; }
394
+ 51%, 100% { opacity: 0; }
395
+ }
src/pips/static/css/main.css ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* PIPS - Main Stylesheet */
2
+ /* Import design tokens first */
3
+ @import 'tokens.css';
4
+
5
+ /* Import base styles */
6
+ @import 'base.css';
7
+
8
+ /* Import all component stylesheets */
9
+ @import 'components/panels.css';
10
+ @import 'components/forms.css';
11
+ @import 'components/buttons.css';
12
+ @import 'components/chat.css';
13
+ @import 'components/sessions.css';
14
+ @import 'components/modal.css';
15
+ @import 'components/utilities.css';
16
+
17
+ /* Import responsive styles last */
18
+ @import 'components/responsive.css';
src/pips/static/css/tokens.css ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary-50: #f0f9ff;
3
+ --primary-100: #e0f2fe;
4
+ --primary-500: #0ea5e9;
5
+ --primary-600: #0284c7;
6
+ --primary-700: #0369a1;
7
+ --primary-800: #075985;
8
+ --primary-900: #0c4a6e;
9
+ --primary-25: #f7fbff;
10
+
11
+ --gray-25: #fcfcfd;
12
+ --gray-50: #f9fafb;
13
+ --gray-100: #f3f4f6;
14
+ --gray-200: #e5e7eb;
15
+ --gray-300: #d1d5db;
16
+ --gray-400: #9ca3af;
17
+ --gray-500: #6b7280;
18
+ --gray-600: #4b5563;
19
+ --gray-700: #374151;
20
+ --gray-800: #1f2937;
21
+ --gray-900: #111827;
22
+
23
+ --success-25: #f7fef7;
24
+ --success-50: #f0fdf4;
25
+ --success-500: #22c55e;
26
+ --success-600: #16a34a;
27
+
28
+ --error-50: #fef2f2;
29
+ --error-500: #ef4444;
30
+ --error-600: #dc2626;
31
+
32
+ --warning-50: #fffbeb;
33
+ --warning-500: #f59e0b;
34
+ --warning-600: #d97706;
35
+
36
+ --radius-sm: 0.375rem;
37
+ --radius-md: 0.5rem;
38
+ --radius-lg: 0.75rem;
39
+ --radius-xl: 1rem;
40
+
41
+ --shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 0.05);
42
+ --shadow-md: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
43
+ --shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
44
+ --shadow-xl: 0 20px 25px -5px rgb(0 0 0 / 0.1), 0 8px 10px -6px rgb(0 0 0 / 0.1);
45
+ }
src/pips/static/default_sessions/builtin_sessions.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exportDate": "2024-01-15T10:00:00.000Z",
3
+ "sessions": {
4
+ "demo_session_math_001": {
5
+ "id": "demo_session_math_001",
6
+ "title": "Calculate compound interest for savings account",
7
+ "problemText": "I want to calculate the compound interest on $1000 invested for 5 years at 6% annual interest rate, compounded quarterly. Show me the formula and calculate the result.",
8
+ "image": null,
9
+ "createdAt": "2024-01-15T09:00:00.000Z",
10
+ "lastUsed": "2024-01-15T09:15:00.000Z",
11
+ "status": "completed",
12
+ "chatHistory": [
13
+ {
14
+ "sender": "PIPS",
15
+ "content": "I'll help you calculate compound interest. Let me break this down step by step and create a Python function to calculate it.",
16
+ "iteration": "Iteration 1",
17
+ "promptDetails": null,
18
+ "timestamp": "2024-01-15T09:01:00.000Z"
19
+ },
20
+ {
21
+ "sender": "AI Assistant",
22
+ "content": "I'll solve this compound interest problem step by step.\n\n**Compound Interest Formula:**\nA = P(1 + r/n)^(nt)\n\nWhere:\n- A = final amount\n- P = principal ($1000)\n- r = annual interest rate (0.06)\n- n = number of times compounded per year (4 for quarterly)\n- t = time in years (5)\n\nLet me calculate this:\n\n```python\ndef calculate_compound_interest(principal, rate, compounds_per_year, years):\n \"\"\"\n Calculate compound interest\n \"\"\"\n amount = principal * (1 + rate/compounds_per_year) ** (compounds_per_year * years)\n interest_earned = amount - principal\n return amount, interest_earned\n\n# Your values\nprincipal = 1000\nrate = 0.06\ncompounds_per_year = 4 # quarterly\nyears = 5\n\nfinal_amount, interest = calculate_compound_interest(principal, rate, compounds_per_year, years)\n\nprint(f\"Principal: ${principal:,.2f}\")\nprint(f\"Annual Interest Rate: {rate*100}%\")\nprint(f\"Compounding: {compounds_per_year} times per year (quarterly)\")\nprint(f\"Time Period: {years} years\")\nprint(f\"\\nFinal Amount: ${final_amount:,.2f}\")\nprint(f\"Interest Earned: ${interest:,.2f}\")\n```\n\n**Result:**\n- Final Amount: $1,346.86\n- Interest Earned: $346.86\n\nYour $1000 investment will grow to $1,346.86 after 5 years, earning $346.86 in compound interest.",
23
+ "iteration": "Iteration 1",
24
+ "promptDetails": null,
25
+ "timestamp": "2024-01-15T09:02:00.000Z"
26
+ }
27
+ ]
28
+ },
29
+ "demo_session_algorithm_002": {
30
+ "id": "demo_session_algorithm_002",
31
+ "title": "Implement binary search algorithm",
32
+ "problemText": "I need to implement a binary search algorithm in Python. It should work on a sorted list and return the index of the target element, or -1 if not found.",
33
+ "image": null,
34
+ "createdAt": "2024-01-15T09:30:00.000Z",
35
+ "lastUsed": "2024-01-15T09:45:00.000Z",
36
+ "status": "completed",
37
+ "chatHistory": [
38
+ {
39
+ "sender": "PIPS",
40
+ "content": "I'll implement a binary search algorithm for you with proper error handling and testing.",
41
+ "iteration": "Iteration 1",
42
+ "promptDetails": null,
43
+ "timestamp": "2024-01-15T09:31:00.000Z"
44
+ },
45
+ {
46
+ "sender": "AI Assistant",
47
+ "content": "I'll implement a binary search algorithm with both iterative and recursive approaches.\n\n```python\ndef binary_search_iterative(arr, target):\n \"\"\"\n Binary search using iterative approach\n \n Args:\n arr: Sorted list to search in\n target: Element to find\n \n Returns:\n Index of target if found, -1 otherwise\n \"\"\"\n left, right = 0, len(arr) - 1\n \n while left <= right:\n mid = left + (right - left) // 2\n \n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n left = mid + 1\n else:\n right = mid - 1\n \n return -1\n\ndef binary_search_recursive(arr, target, left=0, right=None):\n \"\"\"\n Binary search using recursive approach\n \"\"\"\n if right is None:\n right = len(arr) - 1\n \n if left > right:\n return -1\n \n mid = left + (right - left) // 2\n \n if arr[mid] == target:\n return mid\n elif arr[mid] < target:\n return binary_search_recursive(arr, target, mid + 1, right)\n else:\n return binary_search_recursive(arr, target, left, mid - 1)\n\n# Test the implementation\ntest_array = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]\n\nprint(\"Test Array:\", test_array)\nprint(\"\\nTesting Iterative Binary Search:\")\nfor target in [7, 15, 2, 20]:\n result = binary_search_iterative(test_array, target)\n print(f\"Search for {target}: {'Found at index ' + str(result) if result != -1 else 'Not found'}\")\n\nprint(\"\\nTesting Recursive Binary Search:\")\nfor target in [7, 15, 2, 20]:\n result = binary_search_recursive(test_array, target)\n print(f\"Search for {target}: {'Found at index ' + str(result) if result != -1 else 'Not found'}\")\n```\n\n**Output:**\n```\nTest Array: [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]\n\nTesting Iterative Binary Search:\nSearch for 7: Found at index 3\nSearch for 15: Found at index 7\nSearch for 2: Not found\nSearch for 20: Not found\n```\n\n**Time Complexity:** O(log n)\n**Space Complexity:** O(1) for iterative, O(log n) for recursive",
48
+ "iteration": "Iteration 1",
49
+ "promptDetails": null,
50
+ "timestamp": "2024-01-15T09:32:00.000Z"
51
+ }
52
+ ]
53
+ },
54
+ "demo_session_data_003": {
55
+ "id": "demo_session_data_003",
56
+ "title": "Analyze sales data and create visualization",
57
+ "problemText": "I have sales data in a CSV format with columns: date, product, sales_amount, region. Create a Python script to load this data and generate visualizations showing sales trends by region and top-selling products.",
58
+ "image": null,
59
+ "createdAt": "2024-01-15T10:00:00.000Z",
60
+ "lastUsed": "2024-01-15T10:20:00.000Z",
61
+ "status": "completed",
62
+ "chatHistory": [
63
+ {
64
+ "sender": "PIPS",
65
+ "content": "I'll create a comprehensive data analysis script with visualizations for your sales data.",
66
+ "iteration": "Iteration 1",
67
+ "promptDetails": null,
68
+ "timestamp": "2024-01-15T10:01:00.000Z"
69
+ },
70
+ {
71
+ "sender": "AI Assistant",
72
+ "content": "I'll create a comprehensive sales data analysis script with visualizations.\n\n```python\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom datetime import datetime\nimport numpy as np\n\n# Set style for better-looking plots\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\ndef load_and_analyze_sales_data(csv_file):\n \"\"\"\n Load and analyze sales data from CSV file\n \"\"\"\n # Load the data\n try:\n df = pd.read_csv(csv_file)\n print(f\"Data loaded successfully. Shape: {df.shape}\")\n except FileNotFoundError:\n print(\"Creating sample data for demonstration...\")\n df = create_sample_data()\n \n # Convert date column to datetime\n df['date'] = pd.to_datetime(df['date'])\n \n # Basic data info\n print(\"\\nDataset Info:\")\n print(df.info())\n print(\"\\nFirst 5 rows:\")\n print(df.head())\n \n return df\n\ndef create_sample_data():\n \"\"\"\n Create sample sales data for demonstration\n \"\"\"\n np.random.seed(42)\n dates = pd.date_range('2023-01-01', '2023-12-31', freq='D')\n products = ['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Headphones']\n regions = ['North', 'South', 'East', 'West']\n \n data = []\n for date in dates:\n for _ in range(np.random.randint(5, 15)):\n data.append({\n 'date': date,\n 'product': np.random.choice(products),\n 'sales_amount': np.random.uniform(50, 2000),\n 'region': np.random.choice(regions)\n })\n \n return pd.DataFrame(data)\n\ndef analyze_sales_trends(df):\n \"\"\"\n Generate comprehensive sales analysis and visualizations\n \"\"\"\n # Create figure with subplots\n fig, axes = plt.subplots(2, 2, figsize=(15, 12))\n fig.suptitle('Sales Data Analysis Dashboard', fontsize=16, fontweight='bold')\n \n # 1. Sales trends by region over time\n df['month'] = df['date'].dt.to_period('M')\n monthly_sales = df.groupby(['month', 'region'])['sales_amount'].sum().reset_index()\n monthly_sales['month'] = monthly_sales['month'].astype(str)\n \n for region in df['region'].unique():\n region_data = monthly_sales[monthly_sales['region'] == region]\n axes[0, 0].plot(region_data['month'], region_data['sales_amount'], \n marker='o', label=region, linewidth=2)\n \n axes[0, 0].set_title('Monthly Sales Trends by Region')\n axes[0, 0].set_xlabel('Month')\n axes[0, 0].set_ylabel('Sales Amount ($)')\n axes[0, 0].legend()\n axes[0, 0].tick_params(axis='x', rotation=45)\n \n # 2. Top-selling products\n product_sales = df.groupby('product')['sales_amount'].sum().sort_values(ascending=False)\n \n axes[0, 1].bar(product_sales.index, product_sales.values, color='skyblue')\n axes[0, 1].set_title('Total Sales by Product')\n axes[0, 1].set_xlabel('Product')\n axes[0, 1].set_ylabel('Total Sales ($)')\n axes[0, 1].tick_params(axis='x', rotation=45)\n \n # 3. Sales distribution by region (pie chart)\n region_sales = df.groupby('region')['sales_amount'].sum()\n \n axes[1, 0].pie(region_sales.values, labels=region_sales.index, autopct='%1.1f%%')\n axes[1, 0].set_title('Sales Distribution by Region')\n \n # 4. Daily sales volume\n daily_sales = df.groupby('date')['sales_amount'].sum()\n \n axes[1, 1].plot(daily_sales.index, daily_sales.values, alpha=0.7, color='green')\n axes[1, 1].set_title('Daily Sales Volume')\n axes[1, 1].set_xlabel('Date')\n axes[1, 1].set_ylabel('Sales Amount ($)')\n \n plt.tight_layout()\n plt.show()\n \n # Print summary statistics\n print(\"\\n\" + \"=\"*50)\n print(\"SALES ANALYSIS SUMMARY\")\n print(\"=\"*50)\n \n print(f\"\\nTotal Sales: ${df['sales_amount'].sum():,.2f}\")\n print(f\"Average Daily Sales: ${df.groupby('date')['sales_amount'].sum().mean():,.2f}\")\n print(f\"Number of Transactions: {len(df):,}\")\n \n print(\"\\nTop 3 Products by Sales:\")\n for i, (product, sales) in enumerate(product_sales.head(3).items(), 1):\n print(f\"{i}. {product}: ${sales:,.2f}\")\n \n print(\"\\nSales by Region:\")\n for region, sales in region_sales.items():\n percentage = (sales / region_sales.sum()) * 100\n print(f\"{region}: ${sales:,.2f} ({percentage:.1f}%)\")\n\n# Main execution\nif __name__ == \"__main__\":\n # Load and analyze the data\n df = load_and_analyze_sales_data('sales_data.csv')\n \n # Generate analysis and visualizations\n analyze_sales_trends(df)\n \n print(\"\\nAnalysis complete! Check the generated visualizations.\")\n```\n\nThis script provides:\n1. **Data loading** with sample data generation if CSV not found\n2. **Four key visualizations**:\n - Monthly sales trends by region\n - Top-selling products bar chart\n - Regional sales distribution pie chart\n - Daily sales volume timeline\n3. **Summary statistics** including totals, averages, and rankings\n\n**Required libraries:** `pandas`, `matplotlib`, `seaborn`, `numpy`\n\nTo run: `pip install pandas matplotlib seaborn numpy` then execute the script.",
73
+ "iteration": "Iteration 1",
74
+ "promptDetails": null,
75
+ "timestamp": "2024-01-15T10:02:00.000Z"
76
+ }
77
+ ]
78
+ }
79
+ }
80
+ }
src/pips/static/js/core/logger.js ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Logger utility for PIPS application
3
+ */
4
+ export class Logger {
5
+ static log(message, ...args) {
6
+ console.log(`[DEBUG] ${message}`, ...args);
7
+ }
8
+
9
+ static warn(message, ...args) {
10
+ console.warn(`[DEBUG] ${message}`, ...args);
11
+ }
12
+
13
+ static error(message, ...args) {
14
+ console.error(`[DEBUG] ${message}`, ...args);
15
+ }
16
+
17
+ static debug(context, message, data = null) {
18
+ if (data) {
19
+ console.log(`[DEBUG] ${context}: ${message}`, data);
20
+ } else {
21
+ console.log(`[DEBUG] ${context}: ${message}`);
22
+ }
23
+ }
24
+
25
+ static time(label) {
26
+ console.time(`[DEBUG] ${label}`);
27
+ }
28
+
29
+ static timeEnd(label) {
30
+ console.timeEnd(`[DEBUG] ${label}`);
31
+ }
32
+ }
src/pips/static/js/core/state.js ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Application State Management
3
+ */
4
+ import { Logger } from './logger.js';
5
+
6
+ export class AppState {
7
+ constructor() {
8
+ // Core session state
9
+ this.currentSessionId = null;
10
+ this.isSolving = false;
11
+ this.currentIteration = 0;
12
+ this.maxIterationsCount = 8;
13
+ this.isAwaitingUserFeedback = false;
14
+
15
+ // Session management state
16
+ this.currentSessionData = null;
17
+ this.selectedSessionId = null; // Currently loaded session (null means current/new session)
18
+ this.sessionsExpanded = false;
19
+
20
+ // Streaming and execution tracking
21
+ this.streamingMessages = new Map(); // Track streaming messages by iteration
22
+ this.executionSpinners = new Map(); // Track execution spinners by iteration
23
+
24
+ // Timeout and connection monitoring
25
+ this.solvingTimeoutId = null;
26
+ this.connectionTimeoutId = null;
27
+ this.lastHeartbeat = Date.now();
28
+ this.connectionRetries = 0;
29
+
30
+ // Constants
31
+ this.SOLVING_TIMEOUT_MS = 300000; // 5 minutes timeout for solving
32
+ this.CONNECTION_TIMEOUT_MS = 30000; // 30 seconds timeout for connection issues
33
+ this.HEARTBEAT_INTERVAL_MS = 15000; // Send heartbeat every 15 seconds
34
+ this.MAX_CONNECTION_RETRIES = 3;
35
+
36
+ // Session storage
37
+ this.SESSION_STORAGE_KEY = 'pips_sessions';
38
+
39
+
40
+ // Event listeners for state changes
41
+ this.listeners = {};
42
+ }
43
+
44
+ // Event system for state changes
45
+ on(event, callback) {
46
+ if (!this.listeners[event]) {
47
+ this.listeners[event] = [];
48
+ }
49
+ this.listeners[event].push(callback);
50
+ }
51
+
52
+ emit(event, data) {
53
+ if (this.listeners[event]) {
54
+ this.listeners[event].forEach(callback => callback(data));
55
+ }
56
+ }
57
+
58
+ // Session management
59
+ setCurrentSession(sessionData) {
60
+ this.currentSessionData = sessionData;
61
+ this.emit('sessionChanged', sessionData?.id);
62
+ Logger.debug('State', 'Current session updated', sessionData?.id);
63
+ }
64
+
65
+ setSelectedSession(sessionId) {
66
+ this.selectedSessionId = sessionId;
67
+ this.emit('selectedSessionChanged', sessionId);
68
+ Logger.debug('State', 'Selected session changed', sessionId);
69
+ }
70
+
71
+ // Solving state
72
+ setSolving(solving) {
73
+ this.isSolving = solving;
74
+ this.emit('solvingStateChanged', solving);
75
+ Logger.debug('State', `Solving state: ${solving}`);
76
+ }
77
+
78
+ setIteration(iteration) {
79
+ this.currentIteration = iteration;
80
+ this.emit('iterationChanged', iteration);
81
+ }
82
+
83
+ // User feedback state (for interactive mode)
84
+ setUserFeedback(awaiting) {
85
+ this.isAwaitingUserFeedback = awaiting;
86
+ this.emit('userFeedbackStateChanged', awaiting);
87
+ Logger.debug('State', `User feedback state: ${awaiting}`);
88
+ }
89
+
90
+ // Connection state
91
+ setConnectionRetries(retries) {
92
+ this.connectionRetries = retries;
93
+ this.emit('connectionRetriesChanged', retries);
94
+ }
95
+
96
+ updateLastHeartbeat() {
97
+ this.lastHeartbeat = Date.now();
98
+ }
99
+
100
+ // Timeout management
101
+ setSolvingTimeout(timeoutId) {
102
+ this.clearSolvingTimeout();
103
+ this.solvingTimeoutId = timeoutId;
104
+ }
105
+
106
+ clearSolvingTimeout() {
107
+ if (this.solvingTimeoutId) {
108
+ clearTimeout(this.solvingTimeoutId);
109
+ this.solvingTimeoutId = null;
110
+ }
111
+ }
112
+
113
+ setConnectionTimeout(timeoutId) {
114
+ this.clearConnectionTimeout();
115
+ this.connectionTimeoutId = timeoutId;
116
+ }
117
+
118
+ clearConnectionTimeout() {
119
+ if (this.connectionTimeoutId) {
120
+ clearTimeout(this.connectionTimeoutId);
121
+ this.connectionTimeoutId = null;
122
+ }
123
+ }
124
+
125
+ // Streaming management
126
+ addStreamingMessage(id, element) {
127
+ this.streamingMessages.set(id, element);
128
+ }
129
+
130
+ removeStreamingMessage(id) {
131
+ this.streamingMessages.delete(id);
132
+ }
133
+
134
+ addExecutionSpinner(id, element) {
135
+ this.executionSpinners.set(id, element);
136
+ }
137
+
138
+ removeExecutionSpinner(id) {
139
+ this.executionSpinners.delete(id);
140
+ }
141
+
142
+ // Get current state snapshot
143
+ getSnapshot() {
144
+ return {
145
+ currentSessionId: this.currentSessionId,
146
+ isSolving: this.isSolving,
147
+ currentIteration: this.currentIteration,
148
+ selectedSessionId: this.selectedSessionId,
149
+ connectionRetries: this.connectionRetries,
150
+ lastHeartbeat: this.lastHeartbeat,
151
+ streamingMessagesCount: this.streamingMessages.size,
152
+ executionSpinnersCount: this.executionSpinners.size
153
+ };
154
+ }
155
+
156
+ // Reset state (for new session)
157
+ reset() {
158
+ this.currentSessionId = null;
159
+ this.isSolving = false;
160
+ this.currentIteration = 0;
161
+ this.isAwaitingUserFeedback = false;
162
+ this.currentSessionData = null;
163
+ this.selectedSessionId = null;
164
+ this.clearSolvingTimeout();
165
+ this.clearConnectionTimeout();
166
+ this.streamingMessages.clear();
167
+ this.executionSpinners.clear();
168
+ this.emit('stateReset');
169
+ Logger.debug('State', 'Application state reset');
170
+ }
171
+ }
172
+
173
+ // Create singleton instance
174
+ export const appState = new AppState();
src/pips/static/js/core/storage.js ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Storage utilities for PIPS application
3
+ */
4
+ import { Logger } from './logger.js';
5
+
6
+ export class StorageManager {
7
+ constructor() {
8
+ this.SESSION_STORAGE_KEY = 'pips_sessions';
9
+ this.API_KEYS_STORAGE_KEY = 'pips_api_keys';
10
+ this.USER_SETTINGS_STORAGE_KEY = 'pips_user_settings';
11
+ this.DEFAULT_SESSIONS_KEY = 'pips_default_session_ids';
12
+ }
13
+
14
+ // Session storage management
15
+ loadSessions() {
16
+ try {
17
+ const stored = localStorage.getItem(this.SESSION_STORAGE_KEY);
18
+ let sessions = stored ? JSON.parse(stored) : {};
19
+
20
+ // MIGRATION: Older versions stored sessions as an array. Convert to
21
+ // an object keyed by session.id so the rest of the app can work
22
+ // uniformly.
23
+ if (Array.isArray(sessions)) {
24
+ const converted = {};
25
+ sessions.forEach((sess) => {
26
+ if (sess && sess.id) {
27
+ converted[sess.id] = sess;
28
+ }
29
+ });
30
+
31
+ // Persist the converted structure back to localStorage so we
32
+ // do this migration only once.
33
+ localStorage.setItem(this.SESSION_STORAGE_KEY, JSON.stringify(converted));
34
+ sessions = converted;
35
+
36
+ Logger.debug('Storage', `Migrated legacy array-based sessions to object with ${Object.keys(converted).length} entries`);
37
+ }
38
+
39
+ Logger.debug('Storage', `Loaded ${Object.keys(sessions).length} sessions from localStorage`);
40
+ return sessions;
41
+ } catch (e) {
42
+ Logger.error('Storage', 'Error loading sessions from localStorage:', e);
43
+ return {};
44
+ }
45
+ }
46
+
47
+ saveSessions(sessions) {
48
+ // Sanity check: if an array was passed in by mistake, convert it to
49
+ // object form immediately so we never persist the wrong structure.
50
+ if (Array.isArray(sessions)) {
51
+ const obj = {};
52
+ sessions.forEach((sess) => {
53
+ if (sess && sess.id) {
54
+ obj[sess.id] = sess;
55
+ }
56
+ });
57
+ sessions = obj;
58
+ Logger.warn('Storage', 'saveSessions received array – converted to object before persisting');
59
+ }
60
+
61
+ try {
62
+ localStorage.setItem(this.SESSION_STORAGE_KEY, JSON.stringify(sessions));
63
+ Logger.debug('Storage', `Saved ${Object.keys(sessions).length} sessions to localStorage`);
64
+ } catch (e) {
65
+ Logger.error('Storage', 'Error saving sessions to localStorage:', e);
66
+ }
67
+ }
68
+
69
+ saveSession(sessionId, sessionData) {
70
+ const sessions = this.loadSessions();
71
+ sessions[sessionId] = sessionData;
72
+ this.saveSessions(sessions);
73
+ }
74
+
75
+ deleteSession(sessionId) {
76
+ const sessions = this.loadSessions();
77
+ delete sessions[sessionId];
78
+ this.saveSessions(sessions);
79
+ Logger.debug('Storage', `Deleted session ${sessionId}`);
80
+ }
81
+
82
+ clearAllSessions() {
83
+ localStorage.removeItem(this.SESSION_STORAGE_KEY);
84
+ Logger.debug('Storage', 'Cleared all sessions from localStorage');
85
+ }
86
+
87
+ // API keys storage
88
+ loadApiKeys() {
89
+ try {
90
+ const saved = localStorage.getItem(this.API_KEYS_STORAGE_KEY);
91
+ if (saved) {
92
+ const apiKeys = JSON.parse(saved);
93
+ Logger.debug('Storage', 'Loaded API keys from localStorage');
94
+ return apiKeys;
95
+ }
96
+ return {};
97
+ } catch (e) {
98
+ Logger.warn('Storage', 'Could not load API keys from localStorage:', e);
99
+ return {};
100
+ }
101
+ }
102
+
103
+ saveApiKeys(apiKeys) {
104
+ try {
105
+ localStorage.setItem(this.API_KEYS_STORAGE_KEY, JSON.stringify(apiKeys));
106
+ Logger.debug('Storage', 'Saved API keys to localStorage');
107
+ } catch (e) {
108
+ Logger.warn('Storage', 'Could not save API keys to localStorage:', e);
109
+ }
110
+ }
111
+
112
+ // User settings storage
113
+ loadUserSettings() {
114
+ try {
115
+ const saved = localStorage.getItem(this.USER_SETTINGS_STORAGE_KEY);
116
+ if (saved) {
117
+ const settings = JSON.parse(saved);
118
+ Logger.debug('Storage', 'Loaded user settings from localStorage');
119
+ return settings;
120
+ }
121
+ return {};
122
+ } catch (e) {
123
+ Logger.warn('Storage', 'Could not load user settings from localStorage:', e);
124
+ return {};
125
+ }
126
+ }
127
+
128
+ saveUserSettings(settings) {
129
+ try {
130
+ localStorage.setItem(this.USER_SETTINGS_STORAGE_KEY, JSON.stringify(settings));
131
+ Logger.debug('Storage', 'Saved user settings to localStorage');
132
+ } catch (e) {
133
+ Logger.warn('Storage', 'Could not save user settings to localStorage:', e);
134
+ }
135
+ }
136
+
137
+ // Export sessions for backup
138
+ exportSessions() {
139
+ const sessions = this.loadSessions();
140
+ const defaultSessionIds = this.getDefaultSessionIds();
141
+
142
+ // Filter out default sessions
143
+ const userSessions = {};
144
+ Object.entries(sessions).forEach(([sessionId, sessionData]) => {
145
+ if (!defaultSessionIds.includes(sessionId)) {
146
+ userSessions[sessionId] = sessionData;
147
+ }
148
+ });
149
+
150
+ const exportData = {
151
+ exportDate: new Date().toISOString(),
152
+ sessions: userSessions
153
+ };
154
+
155
+ const sessionCount = Object.keys(userSessions).length;
156
+ const filename = sessionCount > 0 ?
157
+ `pips_sessions_${new Date().toISOString().split('T')[0]}.json` :
158
+ `pips_sessions_empty_${new Date().toISOString().split('T')[0]}.json`;
159
+
160
+ const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' });
161
+ const url = URL.createObjectURL(blob);
162
+ const a = document.createElement('a');
163
+ a.href = url;
164
+ a.download = filename;
165
+ document.body.appendChild(a);
166
+ a.click();
167
+ document.body.removeChild(a);
168
+ URL.revokeObjectURL(url);
169
+
170
+ Logger.debug('Storage', `Exported ${sessionCount} user sessions (excluding ${defaultSessionIds.length} default sessions)`);
171
+ return true;
172
+ }
173
+
174
+ // Export single session
175
+ exportSingleSession(sessionId) {
176
+ try {
177
+ const sessions = this.loadSessions();
178
+ const session = sessions[sessionId];
179
+
180
+ if (!session) {
181
+ Logger.error('Storage', `Session ${sessionId} not found for export`);
182
+ return false;
183
+ }
184
+
185
+ const exportData = {
186
+ exportDate: new Date().toISOString(),
187
+ sessions: {
188
+ [sessionId]: session
189
+ }
190
+ };
191
+
192
+ const blob = new Blob([JSON.stringify(exportData, null, 2)], { type: 'application/json' });
193
+ const url = URL.createObjectURL(blob);
194
+ const a = document.createElement('a');
195
+ a.href = url;
196
+ a.download = `pips_session_${session.title.replace(/[^a-z0-9]/gi, '_').toLowerCase()}_${new Date().toISOString().split('T')[0]}.json`;
197
+ document.body.appendChild(a);
198
+ a.click();
199
+ document.body.removeChild(a);
200
+ URL.revokeObjectURL(url);
201
+
202
+ Logger.debug('Storage', `Single session ${sessionId} exported successfully`);
203
+ return true;
204
+ } catch (e) {
205
+ Logger.error('Storage', 'Error exporting single session:', e);
206
+ return false;
207
+ }
208
+ }
209
+
210
+ // Import sessions from JSON data
211
+ importSessions(rawJson, options = {}) {
212
+ const { merge = true, overwriteDuplicates = false } = options;
213
+
214
+ try {
215
+ Logger.debug('Storage', 'Starting session import...');
216
+
217
+ // Parse and validate JSON
218
+ const importData = typeof rawJson === 'string' ? JSON.parse(rawJson) : rawJson;
219
+
220
+ if (!importData || typeof importData !== 'object') {
221
+ throw new Error('Invalid import data: not an object');
222
+ }
223
+
224
+ if (!importData.sessions || typeof importData.sessions !== 'object') {
225
+ throw new Error('Invalid import data: missing or invalid sessions object');
226
+ }
227
+
228
+ const incomingSessions = importData.sessions;
229
+ const incomingIds = Object.keys(incomingSessions);
230
+
231
+ Logger.debug('Storage', `Found ${incomingIds.length} sessions to import`);
232
+
233
+ // Load existing sessions if merging
234
+ let existingSessions = merge ? this.loadSessions() : {};
235
+ let importedCount = 0;
236
+ let skippedCount = 0;
237
+ let duplicatesFound = [];
238
+
239
+ // Process each incoming session
240
+ for (const sessionId of incomingIds) {
241
+ const session = incomingSessions[sessionId];
242
+
243
+ // Validate session structure
244
+ if (!session || !session.id || !session.title) {
245
+ Logger.warn('Storage', `Skipping invalid session: ${sessionId}`);
246
+ skippedCount++;
247
+ continue;
248
+ }
249
+
250
+ // Handle duplicates
251
+ if (existingSessions[sessionId]) {
252
+ duplicatesFound.push(sessionId);
253
+
254
+ if (!overwriteDuplicates) {
255
+ Logger.debug('Storage', `Skipping duplicate session: ${sessionId}`);
256
+ skippedCount++;
257
+ continue;
258
+ } else {
259
+ Logger.debug('Storage', `Overwriting duplicate session: ${sessionId}`);
260
+ }
261
+ }
262
+
263
+ // Check for content-based duplicates (same title and problem text)
264
+ const contentDuplicate = Object.values(existingSessions).find(existing =>
265
+ existing.title === session.title &&
266
+ existing.problemText === session.problemText &&
267
+ existing.id !== sessionId
268
+ );
269
+
270
+ if (contentDuplicate && !overwriteDuplicates) {
271
+ Logger.debug('Storage', `Skipping content duplicate: ${sessionId} (matches ${contentDuplicate.id})`);
272
+ skippedCount++;
273
+ continue;
274
+ }
275
+
276
+ // Import the session
277
+ existingSessions[sessionId] = session;
278
+ importedCount++;
279
+ Logger.debug('Storage', `Imported session: ${sessionId} - "${session.title}"`);
280
+ }
281
+
282
+ // Save the updated sessions
283
+ this.saveSessions(existingSessions);
284
+
285
+ const summary = {
286
+ total: incomingIds.length,
287
+ imported: importedCount,
288
+ skipped: skippedCount,
289
+ duplicates: duplicatesFound.length,
290
+ duplicateIds: duplicatesFound
291
+ };
292
+
293
+ Logger.debug('Storage', 'Import completed:', summary);
294
+ return summary;
295
+
296
+ } catch (e) {
297
+ Logger.error('Storage', 'Error importing sessions:', e);
298
+ throw e;
299
+ }
300
+ }
301
+
302
+ // Import sessions from URL
303
+ async importSessionsFromUrl(url, options = {}) {
304
+ try {
305
+ Logger.debug('Storage', `Fetching sessions from URL: ${url}`);
306
+
307
+ const response = await fetch(url);
308
+ if (!response.ok) {
309
+ throw new Error(`HTTP error! status: ${response.status}`);
310
+ }
311
+
312
+ const rawJson = await response.text();
313
+ const result = this.importSessions(rawJson, options);
314
+
315
+ Logger.debug('Storage', `Successfully imported sessions from URL: ${url}`);
316
+ return result;
317
+
318
+ } catch (e) {
319
+ Logger.error('Storage', 'Error importing sessions from URL:', e);
320
+ throw e;
321
+ }
322
+ }
323
+
324
+ // Helper for programmatic exports
325
+ saveSessionBundle(sessionsObj) {
326
+ try {
327
+ const exportData = {
328
+ exportDate: new Date().toISOString(),
329
+ sessions: sessionsObj
330
+ };
331
+
332
+ Logger.debug('Storage', `Created session bundle with ${Object.keys(sessionsObj).length} sessions`);
333
+ return exportData;
334
+ } catch (e) {
335
+ Logger.error('Storage', 'Error creating session bundle:', e);
336
+ throw e;
337
+ }
338
+ }
339
+
340
+ // Utility methods
341
+ isStorageAvailable() {
342
+ try {
343
+ const test = '__storage_test__';
344
+ localStorage.setItem(test, test);
345
+ localStorage.removeItem(test);
346
+ return true;
347
+ } catch (e) {
348
+ Logger.warn('Storage', 'localStorage is not available');
349
+ return false;
350
+ }
351
+ }
352
+
353
+ getStorageUsage() {
354
+ if (!this.isStorageAvailable()) return null;
355
+
356
+ try {
357
+ const sessions = localStorage.getItem(this.SESSION_STORAGE_KEY);
358
+ const apiKeys = localStorage.getItem(this.API_KEYS_STORAGE_KEY);
359
+
360
+ return {
361
+ sessions: sessions ? sessions.length : 0,
362
+ apiKeys: apiKeys ? apiKeys.length : 0,
363
+ total: (sessions?.length || 0) + (apiKeys?.length || 0)
364
+ };
365
+ } catch (e) {
366
+ Logger.warn('Storage', 'Could not calculate storage usage:', e);
367
+ return null;
368
+ }
369
+ }
370
+
371
+ // Default session tracking
372
+ getDefaultSessionIds() {
373
+ try {
374
+ const saved = localStorage.getItem(this.DEFAULT_SESSIONS_KEY);
375
+ if (saved) {
376
+ const ids = JSON.parse(saved);
377
+ Logger.debug('Storage', 'Loaded default session IDs from localStorage');
378
+ return ids;
379
+ }
380
+ return [];
381
+ } catch (e) {
382
+ Logger.warn('Storage', 'Could not load default session IDs from localStorage:', e);
383
+ return [];
384
+ }
385
+ }
386
+
387
+ saveDefaultSessionIds(ids) {
388
+ try {
389
+ localStorage.setItem(this.DEFAULT_SESSIONS_KEY, JSON.stringify(ids));
390
+ Logger.debug('Storage', 'Saved default session IDs to localStorage');
391
+ } catch (e) {
392
+ Logger.warn('Storage', 'Could not save default session IDs to localStorage:', e);
393
+ }
394
+ }
395
+ }
396
+
397
+ // Create singleton instance
398
+ export const storageManager = new StorageManager();
src/pips/static/js/handlers/socket-handlers.js ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Socket Event Handlers - Handles all socket event handling logic
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { appState } from '../core/state.js';
6
+ import { domManager } from '../ui/dom-manager.js';
7
+ import { messageManager } from '../ui/message-manager.js';
8
+ import { settingsManager } from '../ui/settings-manager.js';
9
+ import { sessionManager } from '../ui/session-manager.js';
10
+
11
+ export class SocketEventHandlers {
12
+ constructor() {
13
+ this.timeoutHandlers = {
14
+ solvingTimeoutId: null,
15
+ connectionTimeoutId: null
16
+ };
17
+ }
18
+
19
+ // Get all event handlers for registration with socket manager
20
+ getEventHandlers() {
21
+ return {
22
+ 'session_connected': (data) => this.handleSessionConnected(data),
23
+ 'settings_updated': (data) => this.handleSettingsUpdated(data),
24
+ 'solving_started': (data) => this.handleSolvingStarted(data),
25
+ 'step_update': (data) => this.handleStepUpdate(data),
26
+ 'solving_complete': (data) => this.handleSolvingComplete(data),
27
+ 'solving_interrupted': (data) => this.handleSolvingInterrupted(data),
28
+ 'solving_error': (data) => this.handleSolvingError(data),
29
+ 'ai_response': (data) => this.handleAIResponse(data),
30
+ 'error': (data) => this.handleError(data),
31
+
32
+ // Streaming event handlers - CRITICAL FOR CHAT FUNCTIONALITY
33
+ 'llm_streaming_start': (data) => this.handleLLMStreamingStart(data),
34
+ 'llm_streaming_token': (data) => this.handleLLMStreamingToken(data),
35
+ 'llm_streaming_end': (data) => this.handleLLMStreamingEnd(data),
36
+ 'llm_response': (data) => this.handleLLMResponse(data),
37
+
38
+ // Code execution handlers
39
+ 'code_execution_start': (data) => this.handleCodeExecutionStart(data),
40
+ 'code_execution_end': (data) => this.handleCodeExecutionEnd(data),
41
+ 'code_execution': (data) => this.handleCodeExecution(data),
42
+
43
+ // Code review streaming handlers
44
+ 'code_check_streaming_start': (data) => this.handleCodeCheckStreamingStart(data),
45
+ 'code_check_streaming_token': (data) => this.handleCodeCheckStreamingToken(data),
46
+ 'code_check_streaming_end': (data) => this.handleCodeCheckStreamingEnd(data),
47
+
48
+ // Interactive mode handlers
49
+ 'awaiting_user_feedback': (data) => this.handleAwaitingUserFeedback(data),
50
+ 'final_artifacts': (data) => this.handleFinalArtifacts(data),
51
+
52
+ // Heartbeat handler
53
+ 'heartbeat_response': (data) => this.handleHeartbeatResponse(data)
54
+ };
55
+ }
56
+
57
+ // Custom socket connection handlers
58
+ getConnectionHandlers() {
59
+ return {
60
+ 'connected': () => this.handleSocketConnected(),
61
+ 'disconnected': ({ reason }) => this.handleSocketDisconnected(reason),
62
+ 'connectionError': ({ error }) => this.handleConnectionError(error),
63
+ 'ioError': ({ error }) => this.handleIOError(error)
64
+ };
65
+ }
66
+
67
+ // Socket connection event handlers
68
+ handleSocketConnected() {
69
+ console.log('[DEBUG] handleSocketConnected called');
70
+
71
+ // Only show "Connecting..." if we don't already have a session ID
72
+ if (!appState.currentSessionId) {
73
+ console.log('[DEBUG] No session ID yet, showing Connecting...');
74
+ domManager.updateSessionInfo('Connecting...');
75
+ } else {
76
+ console.log('[DEBUG] Already have session ID:', appState.currentSessionId);
77
+ }
78
+
79
+ // Fallback safety-net: re-request session info if still unknown after 1s
80
+ setTimeout(() => {
81
+ if (!appState.currentSessionId) {
82
+ console.log('[DEBUG] Session ID still unknown after 1s, requesting session info');
83
+ Logger.debug('Socket Event', 'Session ID still unknown after 1s, requesting session info');
84
+ // Import socketManager here to avoid circular dependency
85
+ import('../network/socket.js').then(({ socketManager }) => {
86
+ socketManager.send('request_session_info');
87
+ });
88
+ }
89
+ }, 1000);
90
+ }
91
+
92
+ handleSocketDisconnected(reason) {
93
+ domManager.updateSessionInfo('Session: Not connected');
94
+ domManager.updateStatus('Disconnected from server', 'error');
95
+
96
+ // Reset solving state if we were solving
97
+ if (appState.isSolving) {
98
+ this.resetSolvingState();
99
+ messageManager.addMessage('PIPS System', 'Connection lost during solving. Please try again.', null);
100
+ }
101
+ }
102
+
103
+ handleConnectionError(error) {
104
+ domManager.updateStatus('Connection error. Retrying...', 'error');
105
+ }
106
+
107
+ handleIOError(error) {
108
+ domManager.updateStatus('Socket.IO error occurred', 'error');
109
+ }
110
+
111
+ // Main socket event handlers
112
+ handleSessionConnected(data) {
113
+ console.log('[DEBUG] handleSessionConnected called with data:', data);
114
+ Logger.debug('Socket Event', 'Session connected:', data);
115
+ sessionManager.handleSessionConnected(data);
116
+
117
+ // Load saved API keys and send to server
118
+ settingsManager.initializeServerSettings();
119
+ }
120
+
121
+ handleSettingsUpdated(data) {
122
+ Logger.debug('Socket Event', 'Settings updated:', data);
123
+ settingsManager.handleSettingsUpdated(data);
124
+ }
125
+
126
+ handleSolvingStarted(data) {
127
+ Logger.debug('Socket Event', 'Solving started:', data);
128
+ appState.setSolving(true);
129
+ appState.setIteration(0);
130
+
131
+ domManager.setSolvingState();
132
+ domManager.updateStatus(data.message, 'info');
133
+ this.setSolvingTimeout();
134
+
135
+ // Clear any existing feedback panels from previous sessions
136
+ if (window.interactiveFeedback) {
137
+ window.interactiveFeedback.removeFeedbackPanel();
138
+ window.interactiveFeedback.removeRestoreButton();
139
+ }
140
+
141
+ // Update session management
142
+ sessionManager.handleSolvingStarted();
143
+ }
144
+
145
+ handleStepUpdate(data) {
146
+ Logger.debug('Socket Event', 'Step update:', data);
147
+
148
+ appState.setIteration(data.iteration || 0);
149
+ domManager.updateStatus(data.message, 'info');
150
+ domManager.updateProgress(data.progress);
151
+
152
+ // Show step message with improved messaging
153
+ let displayMessage = data.message;
154
+
155
+ // Improve messaging for specific steps
156
+ if (data.step === 'code_checking') {
157
+ displayMessage = `Analyzing code quality (iteration ${data.iteration})...`;
158
+ } else if (data.step === 'code_refinement') {
159
+ displayMessage = `Refining solution (iteration ${data.iteration})...`;
160
+ } else if (data.step === 'interrupted') {
161
+ displayMessage = '⏹️ PIPS was interrupted by the user.';
162
+ } else if (data.step === 'finished') {
163
+ displayMessage = '🎉 Solution completed successfully!';
164
+ }
165
+
166
+ messageManager.addMessage('PIPS', displayMessage, data.iteration, data.prompt_details);
167
+ this.resetSolvingTimeout();
168
+ }
169
+
170
+ handleSolvingComplete(data) {
171
+ Logger.debug('Socket Event', 'Solving complete:', data);
172
+
173
+ this.clearSolvingTimeout();
174
+ this.resetSolvingState();
175
+
176
+ // Clean up any interactive feedback UI
177
+ if (window.interactiveFeedback) {
178
+ window.interactiveFeedback.removeFeedbackPanel();
179
+ window.interactiveFeedback.removeRestoreButton();
180
+ }
181
+
182
+ // Display final answer
183
+ if (data.final_answer) {
184
+ messageManager.displayFinalAnswer(data.final_answer);
185
+ }
186
+
187
+ domManager.updateStatus('Problem solving completed successfully!', 'success');
188
+
189
+ // Update session management
190
+ sessionManager.handleSolvingComplete();
191
+ }
192
+
193
+ handleSolvingInterrupted(data) {
194
+ Logger.debug('Socket Event', 'Solving interrupted:', data);
195
+
196
+ this.clearSolvingTimeout();
197
+ this.resetSolvingState();
198
+
199
+ // Clean up any interactive feedback UI
200
+ if (window.interactiveFeedback) {
201
+ window.interactiveFeedback.removeFeedbackPanel();
202
+ window.interactiveFeedback.removeRestoreButton();
203
+ }
204
+
205
+ domManager.updateStatus(data.message || 'Problem solving interrupted', 'warning');
206
+
207
+ // Update session management
208
+ sessionManager.handleSolvingInterrupted();
209
+ }
210
+
211
+ handleSolvingError(data) {
212
+ Logger.error('Socket Event', 'Solving error:', data);
213
+ domManager.updateStatus(`Error: ${data.error}`, 'error');
214
+
215
+ this.clearSolvingTimeout();
216
+ this.resetSolvingState();
217
+
218
+ // Clean up any interactive feedback UI
219
+ if (window.interactiveFeedback) {
220
+ window.interactiveFeedback.removeFeedbackPanel();
221
+ window.interactiveFeedback.removeRestoreButton();
222
+ }
223
+
224
+ messageManager.addMessage('PIPS System', `Error: ${data.error}`, null);
225
+
226
+ // Update session management - clean up and save session
227
+ sessionManager.handleSolvingError();
228
+ }
229
+
230
+ handleAIResponse(data) {
231
+ Logger.debug('Socket Event', 'AI response:', data);
232
+ messageManager.addMessage(data.sender || 'AI Assistant', data.content, data.iteration);
233
+ domManager.updateProgress(data.progress);
234
+ }
235
+
236
+ handleError(data) {
237
+ Logger.error('Socket Event', 'Socket error:', data);
238
+ domManager.updateStatus(`Error: ${data.message}`, 'error');
239
+
240
+ if (appState.isSolving) {
241
+ this.clearSolvingTimeout();
242
+ this.resetSolvingState();
243
+
244
+ // Clean up any interactive feedback UI
245
+ if (window.interactiveFeedback) {
246
+ window.interactiveFeedback.removeFeedbackPanel();
247
+ window.interactiveFeedback.removeRestoreButton();
248
+ }
249
+
250
+ // Clean up session state and save messages when socket errors occur
251
+ sessionManager.handleSolvingError();
252
+ }
253
+ }
254
+
255
+ // STREAMING EVENT HANDLERS - CRITICAL FOR CHAT FUNCTIONALITY
256
+ handleLLMStreamingStart(data) {
257
+ Logger.debug('Socket Event', 'LLM streaming started:', data);
258
+ messageManager.showAIThinkingIndicator(data.iteration, 'AI Assistant', data.model_name);
259
+ }
260
+
261
+ handleLLMStreamingToken(data) {
262
+ Logger.debug('Socket Event', 'LLM streaming token received:', data.token);
263
+ messageManager.updateStreamingMessage(data.token, data.iteration, 'AI Assistant', data.model_name);
264
+ }
265
+
266
+ handleLLMStreamingEnd(data) {
267
+ Logger.debug('Socket Event', 'LLM streaming ended:', data);
268
+ messageManager.removeAIThinkingIndicator(data.iteration, 'AI Assistant');
269
+ messageManager.finalizeStreamingMessage(data.iteration, 'AI Assistant');
270
+ }
271
+
272
+ handleLLMResponse(data) {
273
+ Logger.debug('Socket Event', 'LLM response (fallback):', data);
274
+ // Fallback for non-streaming responses
275
+ messageManager.removeAIThinkingIndicator(data.iteration);
276
+ messageManager.addMessage('AI Assistant', data.response, data.iteration);
277
+ }
278
+
279
+ // CODE EXECUTION HANDLERS
280
+ handleCodeExecutionStart(data) {
281
+ Logger.debug('Socket Event', 'Code execution started:', data);
282
+ messageManager.showExecutionSpinner(data.iteration);
283
+ }
284
+
285
+ handleCodeExecutionEnd(data) {
286
+ Logger.debug('Socket Event', 'Code execution ended:', data);
287
+ messageManager.removeExecutionSpinner(data.iteration);
288
+ }
289
+
290
+ handleCodeExecution(data) {
291
+ Logger.debug('Socket Event', 'Code execution result:', data);
292
+ messageManager.removeExecutionSpinner(data.iteration);
293
+
294
+ let resultText = '';
295
+ if (data.error && data.error.trim() !== '') {
296
+ resultText = `Error: ${data.error}`;
297
+ messageManager.displayExecutionResult(resultText, data.iteration, true);
298
+ } else {
299
+ if (data.stdout && data.stdout.trim() !== '') {
300
+ resultText += `Output: ${data.stdout}\n`;
301
+ }
302
+ if (data.output && data.output.trim() !== '' && data.output !== 'None') {
303
+ resultText += `Result: ${data.output}`;
304
+ }
305
+ if (resultText.trim() === '') {
306
+ resultText = 'Code executed successfully (no output)';
307
+ }
308
+ messageManager.displayExecutionResult(resultText, data.iteration, false);
309
+ }
310
+ }
311
+
312
+ // CODE REVIEW STREAMING HANDLERS
313
+ handleCodeCheckStreamingStart(data) {
314
+ Logger.debug('Socket Event', 'Code reviewer streaming started:', data);
315
+ messageManager.showAIThinkingIndicator(data.iteration, 'AI Code Reviewer', data.model_name);
316
+ }
317
+
318
+ handleCodeCheckStreamingToken(data) {
319
+ Logger.debug('Socket Event', 'Code reviewer streaming token received:', data.token);
320
+ messageManager.updateStreamingMessage(data.token, data.iteration, 'AI Code Reviewer', data.model_name);
321
+ }
322
+
323
+ handleCodeCheckStreamingEnd(data) {
324
+ Logger.debug('Socket Event', 'Code reviewer streaming ended:', data);
325
+ messageManager.removeAIThinkingIndicator(data.iteration, 'AI Code Reviewer');
326
+ messageManager.finalizeStreamingMessage(data.iteration, 'AI Code Reviewer');
327
+ }
328
+
329
+ // Interactive mode handlers
330
+ handleAwaitingUserFeedback(data) {
331
+ Logger.debug('Socket Event', 'Awaiting user feedback:', data);
332
+
333
+ // Pause solving state to allow user interaction
334
+ appState.setUserFeedback(true);
335
+ domManager.updateStatus('Waiting for your feedback...', 'info');
336
+
337
+ // Show the interactive feedback panel
338
+ if (window.interactiveFeedback) {
339
+ window.interactiveFeedback.showFeedbackPanel(data);
340
+ } else {
341
+ // Fallback: show basic feedback interface
342
+ this.showBasicFeedbackInterface(data);
343
+ }
344
+ }
345
+
346
+ handleFinalArtifacts(data) {
347
+ Logger.debug('Socket Event', 'Final artifacts:', data);
348
+
349
+ // Show final artifacts in the UI
350
+ if (window.interactiveFeedback) {
351
+ window.interactiveFeedback.showFinalArtifacts(data);
352
+ } else {
353
+ // Fallback: show in message
354
+ messageManager.addMessage('PIPS System', 'Final solution artifacts are ready.', null);
355
+ }
356
+ }
357
+
358
+ showBasicFeedbackInterface(data) {
359
+ // Basic feedback interface if the interactive-feedback module isn't available
360
+ const feedbackHtml = `
361
+ <div class="basic-feedback-panel">
362
+ <h4>Interactive Feedback Required</h4>
363
+ <p>AI Critic: ${data.critic_text || 'No critic feedback available'}</p>
364
+ <div class="feedback-buttons">
365
+ <button onclick="window.provideFeedback(true, '')">Accept & Continue</button>
366
+ <button onclick="window.provideFeedback(false, '')">Reject & Continue</button>
367
+ <button onclick="window.terminateSession()">Finish Here</button>
368
+ </div>
369
+ </div>
370
+ `;
371
+
372
+ // Add to chat
373
+ messageManager.addMessage('PIPS Interactive', feedbackHtml, data.iteration || null);
374
+
375
+ // Set up global feedback functions
376
+ window.provideFeedback = (acceptCritic, comments) => {
377
+ import('../network/socket.js').then(({ socketManager }) => {
378
+ socketManager.send('provide_feedback', {
379
+ accept_critic: acceptCritic,
380
+ extra_comments: comments,
381
+ quoted_ranges: [],
382
+ terminate: false
383
+ });
384
+ });
385
+ };
386
+
387
+ window.terminateSession = () => {
388
+ import('../network/socket.js').then(({ socketManager }) => {
389
+ socketManager.send('provide_feedback', {
390
+ accept_critic: true,
391
+ extra_comments: '',
392
+ quoted_ranges: [],
393
+ terminate: true
394
+ });
395
+ });
396
+ };
397
+ }
398
+
399
+ // HEARTBEAT HANDLER
400
+ handleHeartbeatResponse(data) {
401
+ Logger.debug('Socket Event', 'Heartbeat response received');
402
+ appState.updateLastHeartbeat();
403
+ }
404
+
405
+ // Timeout management methods
406
+ setSolvingTimeout() {
407
+ appState.setSolvingTimeout(setTimeout(() => {
408
+ Logger.error('SocketHandlers', 'Solving timeout detected - server may be unresponsive');
409
+ domManager.updateStatus('Server timeout detected. The server may be unresponsive. Try refreshing the page.', 'error');
410
+ this.resetSolvingState();
411
+
412
+ // Clean up any interactive feedback UI
413
+ if (window.interactiveFeedback) {
414
+ window.interactiveFeedback.removeFeedbackPanel();
415
+ window.interactiveFeedback.removeRestoreButton();
416
+ }
417
+
418
+ messageManager.addMessage('PIPS System', 'Operation timed out. The server may be experiencing issues. Please try again or contact support if the problem persists.', null);
419
+
420
+ // Clean up session state and save messages when timeout occurs
421
+ sessionManager.handleSolvingError();
422
+ }, appState.SOLVING_TIMEOUT_MS));
423
+ }
424
+
425
+ clearSolvingTimeout() {
426
+ appState.clearSolvingTimeout();
427
+ }
428
+
429
+ resetSolvingTimeout() {
430
+ this.clearSolvingTimeout();
431
+ this.setSolvingTimeout();
432
+ }
433
+
434
+ resetSolvingState() {
435
+ appState.setSolving(false);
436
+ appState.setIteration(0);
437
+ domManager.resetSolvingState();
438
+ }
439
+ }
440
+
441
+ // Create singleton instance
442
+ export const socketEventHandlers = new SocketEventHandlers();
src/pips/static/js/main.js ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * PIPS - Main Application Bootstrap
3
+ * Initializes the modular PIPS application
4
+ */
5
+
6
+ // Import core modules
7
+ import { Logger } from './core/logger.js';
8
+ import { appState } from './core/state.js';
9
+ import { socketManager } from './network/socket.js';
10
+ import { storageManager } from './core/storage.js';
11
+
12
+ // Import UI modules
13
+ import { domManager } from './ui/dom-manager.js';
14
+ import { messageManager } from './ui/message-manager.js';
15
+ import { settingsManager } from './ui/settings-manager.js';
16
+ import { sessionManager } from './ui/session-manager.js';
17
+ import { imageHandler } from './ui/image-handler.js';
18
+
19
+ // Import handlers
20
+ import { socketEventHandlers } from './handlers/socket-handlers.js';
21
+
22
+ // Global error handlers
23
+ window.addEventListener('error', (event) => {
24
+ Logger.error('Global JavaScript error:', event.error);
25
+ Logger.error('Error message:', event.message);
26
+ Logger.error('Error filename:', event.filename);
27
+ Logger.error('Error line:', event.lineno);
28
+ Logger.error('Error column:', event.colno);
29
+ });
30
+
31
+ window.addEventListener('unhandledrejection', (event) => {
32
+ Logger.error('Unhandled promise rejection:', event.reason);
33
+ });
34
+
35
+ /**
36
+ * Main Application class - Coordinates all modules
37
+ */
38
+ class PIPSApplication {
39
+ constructor() {
40
+ this.isInitialized = false;
41
+ }
42
+
43
+ async initialize() {
44
+ if (this.isInitialized) {
45
+ Logger.warn('App', 'Application already initialized');
46
+ return;
47
+ }
48
+
49
+ try {
50
+ Logger.log('App', 'Initializing PIPS application...');
51
+
52
+ // Initialize managers
53
+ domManager.setupDOMReferences();
54
+ await domManager.initializeIcons();
55
+
56
+ // Set up event handlers
57
+ socketManager.initialize();
58
+ sessionManager.initialize();
59
+ settingsManager.initialize();
60
+ imageHandler.initialize();
61
+
62
+ // Load user settings from storage
63
+ settingsManager.loadUserSettingsFromStorage();
64
+
65
+ // Perform first-run seeding of default sessions
66
+ await this.performFirstRunSeeding();
67
+
68
+ // Set up additional image features
69
+ imageHandler.setupPasteHandler();
70
+
71
+ // Set up core functionality event listeners
72
+ this.setupCoreEventListeners();
73
+
74
+ // Register *all* socket & connection handlers BEFORE connecting
75
+ this.setupSocketHandlers();
76
+
77
+ // Connect socket *after* handlers are registered
78
+ await socketManager.initialize();
79
+
80
+ // Set initial status
81
+ domManager.updateStatus('Connecting to PIPS server...', 'info');
82
+
83
+ this.isInitialized = true;
84
+ Logger.log('App', 'PIPS application initialized successfully');
85
+
86
+ } catch (error) {
87
+ Logger.error('App', 'Error during initialization:', error);
88
+ domManager.updateStatus('Failed to initialize application', 'error');
89
+ }
90
+ }
91
+
92
+ async performFirstRunSeeding() {
93
+ try {
94
+ // Check if this is the first run
95
+ if (!localStorage.getItem('pips_first_run_completed')) {
96
+ Logger.debug('App', 'First run detected, seeding default sessions...');
97
+
98
+ try {
99
+ const result = await storageManager.importSessionsFromUrl('/static/default_sessions/builtin_sessions.json');
100
+ Logger.log('App', `Seeded ${result.imported} default sessions successfully`);
101
+
102
+ // Track which sessions are defaults by storing their IDs
103
+ if (result.imported > 0) {
104
+ const sessions = storageManager.loadSessions();
105
+ const defaultSessionIds = Object.keys(sessions);
106
+ storageManager.saveDefaultSessionIds(defaultSessionIds);
107
+ Logger.debug('App', `Tracked ${defaultSessionIds.length} default session IDs`);
108
+ }
109
+
110
+ // Mark first run as completed
111
+ localStorage.setItem('pips_first_run_completed', 'yes');
112
+ localStorage.setItem('pips_default_sessions_loaded', new Date().toISOString());
113
+
114
+ } catch (error) {
115
+ Logger.warn('App', 'Could not load default sessions (this is normal in development):', error.message);
116
+ // Still mark as completed to avoid repeated attempts
117
+ localStorage.setItem('pips_first_run_completed', 'yes');
118
+ }
119
+ } else {
120
+ Logger.debug('App', 'Not first run, skipping default session seeding');
121
+ }
122
+ } catch (error) {
123
+ Logger.error('App', 'Error during first-run seeding:', error);
124
+ }
125
+ }
126
+
127
+ setupCoreEventListeners() {
128
+ // Core problem solving functionality
129
+ domManager.getElement('solveBtn')?.addEventListener('click', () => this.solveProblem());
130
+ domManager.getElement('interruptBtn')?.addEventListener('click', () => this.interruptSolving());
131
+ domManager.getElement('downloadBtn')?.addEventListener('click', () => messageManager.downloadChat());
132
+
133
+ Logger.debug('App', 'Core event listeners set up');
134
+
135
+ // Set up emergency cleanup handler for page unload
136
+ window.addEventListener('beforeunload', () => {
137
+ Logger.debug('App', 'Page unloading - performing emergency cleanup');
138
+ sessionManager.emergencyCleanupAndSave();
139
+ });
140
+
141
+
142
+ }
143
+
144
+ setupSocketHandlers() {
145
+ console.log('[DEBUG] Setting up socket handlers...');
146
+
147
+ // Register all socket event handlers (these are real Socket.IO events)
148
+ const eventHandlers = socketEventHandlers.getEventHandlers();
149
+ console.log('[DEBUG] Event handlers to register:', Object.keys(eventHandlers));
150
+ socketManager.registerEventHandlers(eventHandlers);
151
+
152
+ // Register connection handlers (these are internal socketManager events)
153
+ const connectionHandlers = socketEventHandlers.getConnectionHandlers();
154
+ console.log('[DEBUG] Connection handlers to register:', Object.keys(connectionHandlers));
155
+ Object.entries(connectionHandlers).forEach(([event, handler]) => {
156
+ socketManager.on(event, handler);
157
+ });
158
+
159
+ Logger.debug('App', 'Socket event handlers set up successfully');
160
+ }
161
+
162
+ // Core functionality methods
163
+ solveProblem() {
164
+ const questionInput = domManager.getElement('questionInput');
165
+ const text = questionInput?.value.trim();
166
+
167
+ if (!text) {
168
+ domManager.updateStatus('Please enter a problem description', 'warning');
169
+ return;
170
+ }
171
+
172
+ // Check if the current session is used and should be read-only
173
+ if (appState.currentSessionData && sessionManager.isSessionUsed(appState.currentSessionData)) {
174
+ domManager.updateStatus('This session has been used. Please start a new session to solve another problem.', 'warning');
175
+ // Automatically start a new session
176
+ sessionManager.startNewSession();
177
+ return;
178
+ }
179
+
180
+ // Get image data if available
181
+ const imageData = imageHandler.getImageForSubmission();
182
+
183
+ // Handle session creation/management through session manager
184
+ const sessionId = sessionManager.handleSolveProblem(text, imageData);
185
+
186
+ // Send current settings to server first to ensure PIPS mode is included
187
+ settingsManager.sendCurrentSettingsToServer();
188
+
189
+ // Send problem to server
190
+ socketManager.send('solve_problem', {
191
+ text: text,
192
+ image: imageData,
193
+ session_id: sessionId
194
+ });
195
+
196
+ Logger.debug('App', 'Problem submitted for solving');
197
+ }
198
+
199
+ interruptSolving() {
200
+ Logger.debug('App', 'Interrupt button clicked');
201
+ socketManager.send('interrupt_solving');
202
+ domManager.updateStatus('Interrupting current task...', 'warning');
203
+ }
204
+
205
+ // Global method for message expansion (called from HTML)
206
+ toggleExpandMessage(button) {
207
+ messageManager.toggleExpandMessage(button);
208
+ }
209
+
210
+ // Global methods for session management (called from HTML)
211
+ get sessionManager() {
212
+ return sessionManager;
213
+ }
214
+
215
+ // Expose modules for debugging and external access
216
+ getModules() {
217
+ return {
218
+ domManager,
219
+ messageManager,
220
+ settingsManager,
221
+ sessionManager,
222
+ imageHandler,
223
+ socketEventHandlers,
224
+ appState,
225
+ socketManager,
226
+ storageManager
227
+ };
228
+ }
229
+ }
230
+
231
+ // Initialize application when DOM is ready
232
+ document.addEventListener('DOMContentLoaded', async () => {
233
+ Logger.log('DOM content loaded');
234
+
235
+ try {
236
+ const app = new PIPSApplication();
237
+ await app.initialize();
238
+
239
+ // Store app instance globally for debugging and HTML callbacks
240
+ window.pipsApp = app;
241
+
242
+ // Also expose key functions globally for HTML access
243
+ window.toggleExpandMessage = (button) => app.toggleExpandMessage(button);
244
+
245
+
246
+
247
+ } catch (error) {
248
+ Logger.error('Failed to initialize PIPS application:', error);
249
+ }
250
+ });
src/pips/static/js/network/socket.js ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Socket.IO connection and event management
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { appState } from '../core/state.js';
6
+
7
+ export class SocketManager {
8
+ constructor() {
9
+ this.socket = null;
10
+ this.eventHandlers = new Map();
11
+ this.isInitialized = false;
12
+ }
13
+
14
+ async initialize() {
15
+ if (this.isInitialized) {
16
+ Logger.warn('Socket', 'Already initialized');
17
+ return this.socket;
18
+ }
19
+
20
+ try {
21
+ Logger.debug('Socket', 'Initializing Socket.IO connection...');
22
+
23
+ this.socket = io({
24
+ transports: ['websocket', 'polling'],
25
+ timeout: 20000,
26
+ forceNew: true,
27
+ upgrade: true,
28
+ rememberUpgrade: true,
29
+ autoConnect: true,
30
+ reconnection: true,
31
+ reconnectionDelay: 1000,
32
+ reconnectionAttempts: 5,
33
+ maxHttpBufferSize: 1e6,
34
+ pingTimeout: 60000,
35
+ pingInterval: 25000
36
+ });
37
+
38
+ this.setupConnectionHandlers();
39
+
40
+ // Register any cached handlers after socket creation
41
+ this.registerCachedEventHandlers();
42
+
43
+ this.isInitialized = true;
44
+
45
+ Logger.debug('Socket', 'Socket.IO initialized successfully');
46
+ return this.socket;
47
+ } catch (e) {
48
+ Logger.error('Socket', 'Error initializing Socket.IO:', e);
49
+ throw e;
50
+ }
51
+ }
52
+
53
+ setupConnectionHandlers() {
54
+ this.socket.on('connect', () => {
55
+ Logger.debug('Socket', 'Socket connected successfully');
56
+ Logger.debug('Socket', 'Socket ID:', this.socket.id);
57
+ Logger.debug('Socket', 'Socket connected:', this.socket.connected);
58
+ Logger.debug('Socket', 'Socket transport:', this.socket.io.engine.transport.name);
59
+
60
+ // Clear timeouts and reset connection state on successful connect
61
+ appState.clearConnectionTimeout();
62
+ appState.setConnectionRetries(0);
63
+ appState.updateLastHeartbeat();
64
+
65
+ this.emit('connected', {
66
+ socketId: this.socket.id,
67
+ transport: this.socket.io.engine.transport.name
68
+ });
69
+ });
70
+
71
+ this.socket.on('disconnect', (reason) => {
72
+ Logger.debug('Socket', 'Socket disconnected');
73
+ Logger.debug('Socket', 'Disconnect reason:', reason);
74
+ Logger.debug('Socket', 'Socket connected:', this.socket.connected);
75
+
76
+ // Clear all timeouts and reset state on disconnect
77
+ appState.clearSolvingTimeout();
78
+ appState.clearConnectionTimeout();
79
+ appState.currentSessionId = null;
80
+
81
+ this.emit('disconnected', { reason });
82
+ });
83
+
84
+ this.socket.on('connect_error', (error) => {
85
+ Logger.error('Socket', 'Socket connection error:', error);
86
+ Logger.error('Socket', 'Error details:', error.message);
87
+
88
+ this.emit('connectionError', { error });
89
+ });
90
+
91
+ this.socket.io.on('error', (error) => {
92
+ Logger.error('Socket', 'Socket.IO error:', error);
93
+ this.emit('ioError', { error });
94
+ });
95
+ }
96
+
97
+ // Event subscription system
98
+ on(event, handler) {
99
+ if (!this.eventHandlers.has(event)) {
100
+ this.eventHandlers.set(event, []);
101
+ }
102
+ this.eventHandlers.get(event).push(handler);
103
+
104
+ Logger.debug('Socket', `Handler stored for event: ${event}`);
105
+ }
106
+
107
+ // Emit custom events (not socket events)
108
+ emit(event, data) {
109
+ if (this.eventHandlers.has(event)) {
110
+ this.eventHandlers.get(event).forEach(handler => {
111
+ try {
112
+ handler(data);
113
+ } catch (e) {
114
+ Logger.error('Socket', `Error in event handler for ${event}:`, e);
115
+ }
116
+ });
117
+ }
118
+ }
119
+
120
+ // Send data to server
121
+ send(event, data) {
122
+ if (!this.socket || !this.socket.connected) {
123
+ Logger.error('Socket', 'Cannot send - socket not connected');
124
+ return false;
125
+ }
126
+
127
+ try {
128
+ this.socket.emit(event, data);
129
+ Logger.debug('Socket', `Sent event: ${event}`, data);
130
+ return true;
131
+ } catch (e) {
132
+ Logger.error('Socket', `Error sending event ${event}:`, e);
133
+ return false;
134
+ }
135
+ }
136
+
137
+ // Connection utilities
138
+ isConnected() {
139
+ return this.socket && this.socket.connected;
140
+ }
141
+
142
+ getSocketId() {
143
+ return this.socket?.id || null;
144
+ }
145
+
146
+ disconnect() {
147
+ if (this.socket) {
148
+ this.socket.disconnect();
149
+ Logger.debug('Socket', 'Socket disconnected manually');
150
+ }
151
+ }
152
+
153
+ reconnect() {
154
+ if (this.socket) {
155
+ this.socket.disconnect();
156
+ setTimeout(() => {
157
+ this.socket.connect();
158
+ Logger.debug('Socket', 'Attempting manual reconnection');
159
+ }, 1000);
160
+ }
161
+ }
162
+
163
+ // Register all event handlers from the original monolithic code
164
+ registerEventHandlers(handlers) {
165
+ Object.entries(handlers).forEach(([event, handler]) => {
166
+ // Store in internal system for tracking
167
+ if (!this.eventHandlers.has(event)) {
168
+ this.eventHandlers.set(event, []);
169
+ }
170
+ this.eventHandlers.get(event).push(handler);
171
+
172
+ // Register directly with socket if it exists
173
+ if (this.socket) {
174
+ this.socket.on(event, handler);
175
+ Logger.debug('Socket', `Registered handler for event: ${event}`);
176
+ } else {
177
+ Logger.debug('Socket', `Cached handler for event: ${event} (socket not ready)`);
178
+ }
179
+ });
180
+ }
181
+
182
+ // Helper method to register all cached handlers after socket creation
183
+ registerCachedEventHandlers() {
184
+ console.log('[DEBUG] registerCachedEventHandlers called, handlers map:', this.eventHandlers);
185
+ this.eventHandlers.forEach((handlers, event) => {
186
+ handlers.forEach(handler => {
187
+ if (this.socket) {
188
+ this.socket.on(event, handler);
189
+ console.log(`[DEBUG] Registered cached handler for event: ${event}`);
190
+ Logger.debug('Socket', `Registered cached handler for event: ${event}`);
191
+ }
192
+ });
193
+ });
194
+ }
195
+ }
196
+
197
+ // Create singleton instance
198
+ export const socketManager = new SocketManager();
src/pips/static/js/ui/dom-manager.js ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * DOM Manager - Handles DOM references, basic UI operations, and status management
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+
6
+ export class DOMManager {
7
+ constructor() {
8
+ this.elements = {};
9
+ }
10
+
11
+ setupDOMReferences() {
12
+ // Cache all DOM elements
13
+ this.elements = {
14
+ // Input elements
15
+ questionInput: document.getElementById('questionInput'),
16
+ imageInput: document.getElementById('imageInput'),
17
+ imagePreview: document.getElementById('imagePreview'),
18
+
19
+ // Button elements
20
+ solveBtn: document.getElementById('solveBtn'),
21
+ interruptBtn: document.getElementById('interruptBtn'),
22
+ settingsBtn: document.getElementById('settingsBtn'),
23
+ downloadBtn: document.getElementById('downloadBtn'),
24
+ newSessionBtn: document.getElementById('newSessionBtn'),
25
+
26
+ // UI elements
27
+ chatArea: document.getElementById('chatArea'),
28
+ statusIndicator: document.getElementById('statusIndicator'),
29
+ progressContainer: document.getElementById('progressContainer'),
30
+ progressFill: document.getElementById('progressFill'),
31
+ sessionInfo: document.getElementById('sessionInfo'),
32
+
33
+ // Settings modal elements
34
+ settingsModal: document.getElementById('settingsModal'),
35
+ settingsForm: document.getElementById('settingsForm'),
36
+ closeBtn: document.querySelector('.close'),
37
+
38
+ // Form elements
39
+ modelSelect: document.getElementById('modelSelect'),
40
+ generatorModelSelect: document.getElementById('generatorModelSelect'),
41
+ criticModelSelect: document.getElementById('criticModelSelect'),
42
+ openaiApiKeyInput: document.getElementById('openaiApiKeyInput'),
43
+ googleApiKeyInput: document.getElementById('googleApiKeyInput'),
44
+ anthropicApiKeyInput: document.getElementById('anthropicApiKeyInput'),
45
+ maxIterations: document.getElementById('maxIterations'),
46
+ temperature: document.getElementById('temperature'),
47
+ maxTokens: document.getElementById('maxTokens'),
48
+ maxExecutionTime: document.getElementById('maxExecutionTime'),
49
+ pipsModeSwitch: document.getElementById('pipsModeSwitch'),
50
+ pipsModeAgent: document.getElementById('pipsModeAgent'),
51
+ pipsModeInteractive: document.getElementById('pipsModeInteractive'),
52
+ modeDescription: document.getElementById('modeDescription'),
53
+ customRules: document.getElementById('customRules'),
54
+ customRulesSettings: document.getElementById('customRulesSettings'),
55
+
56
+ // Session elements
57
+ sessionsToggle: document.getElementById('sessionsToggle'),
58
+ sessionsContainer: document.getElementById('sessionsContainer'),
59
+ sessionsList: document.getElementById('sessionsList'),
60
+ clearSessionsBtn: document.getElementById('clearSessionsBtn'),
61
+ exportSessionsBtn: document.getElementById('exportSessionsBtn'),
62
+ importSessionsBtn: document.getElementById('importSessionsBtn'),
63
+ importSessionsInput: document.getElementById('importSessionsInput'),
64
+
65
+ // Upload elements
66
+ imageUpload: document.querySelector('.image-upload'),
67
+ imageUploadBtn: document.querySelector('.image-upload-btn')
68
+ };
69
+
70
+ // Verify critical elements exist
71
+ const criticalElements = [
72
+ 'questionInput', 'solveBtn', 'chatArea', 'statusIndicator'
73
+ ];
74
+
75
+ for (const elementName of criticalElements) {
76
+ if (!this.elements[elementName]) {
77
+ Logger.error(`Critical element missing: ${elementName}`);
78
+ }
79
+ }
80
+
81
+ Logger.debug('DOM', 'DOM references set up successfully');
82
+ }
83
+
84
+ async initializeIcons() {
85
+ try {
86
+ if (typeof feather !== 'undefined') {
87
+ feather.replace();
88
+ Logger.log('Feather icons initialized successfully');
89
+ } else {
90
+ Logger.warn('Feather icons library not found');
91
+ }
92
+ } catch (e) {
93
+ Logger.error('Error initializing Feather icons:', e);
94
+ }
95
+ }
96
+
97
+ updateStatus(message, type = 'info') {
98
+ this.elements.statusIndicator.textContent = message;
99
+ this.elements.statusIndicator.className = `status-bar show ${type}`;
100
+
101
+ // Auto-hide status after 5 seconds unless it's an error
102
+ if (type !== 'error') {
103
+ setTimeout(() => {
104
+ this.elements.statusIndicator.classList.remove('show');
105
+ }, 5000);
106
+ }
107
+ }
108
+
109
+ updateSessionInfo(text) {
110
+ this.elements.sessionInfo.textContent = text;
111
+ }
112
+
113
+ resetSolvingState() {
114
+ this.elements.solveBtn.style.display = 'inline-flex';
115
+ this.elements.interruptBtn.style.display = 'none';
116
+ this.elements.questionInput.disabled = false;
117
+ this.elements.progressContainer.classList.remove('show');
118
+ this.elements.progressFill.style.width = '0%';
119
+ }
120
+
121
+ setSolvingState() {
122
+ this.elements.solveBtn.style.display = 'none';
123
+ this.elements.interruptBtn.style.display = 'inline-flex';
124
+ this.elements.questionInput.disabled = true;
125
+ this.elements.progressContainer.classList.add('show');
126
+ }
127
+
128
+ updateProgress(progress) {
129
+ if (progress !== undefined) {
130
+ this.elements.progressFill.style.width = `${progress}%`;
131
+ }
132
+ }
133
+
134
+ clearInputs() {
135
+ this.elements.questionInput.value = '';
136
+ this.elements.imagePreview.style.display = 'none';
137
+ }
138
+
139
+ getElement(name) {
140
+ return this.elements[name];
141
+ }
142
+
143
+ getAllElements() {
144
+ return this.elements;
145
+ }
146
+ }
147
+
148
+ // Create singleton instance
149
+ export const domManager = new DOMManager();
src/pips/static/js/ui/image-handler.js ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Image Handler - Handles image upload and drag & drop functionality
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { domManager } from './dom-manager.js';
6
+
7
+ export class ImageHandler {
8
+ constructor() {
9
+ this.isInitialized = false;
10
+ this.currentImageData = null;
11
+ }
12
+
13
+ initialize() {
14
+ if (this.isInitialized) return;
15
+
16
+ this.setupEventListeners();
17
+ this.setupDragAndDrop();
18
+ this.isInitialized = true;
19
+
20
+ Logger.debug('Image', 'Image handler initialized');
21
+ }
22
+
23
+ setupEventListeners() {
24
+ // Image upload listeners
25
+ domManager.getElement('imageInput')?.addEventListener('change', (e) => this.handleImageUpload(e));
26
+ domManager.getElement('imageUploadBtn')?.addEventListener('click', () => this.triggerImageUpload());
27
+
28
+ Logger.debug('Image', 'Event listeners set up');
29
+ }
30
+
31
+ setupDragAndDrop() {
32
+ const imageUpload = domManager.getElement('imageUpload');
33
+
34
+ if (!imageUpload) {
35
+ Logger.warn('Image', 'Image upload element not found');
36
+ return;
37
+ }
38
+
39
+ imageUpload.addEventListener('dragover', (e) => {
40
+ e.preventDefault();
41
+ imageUpload.classList.add('drag-over');
42
+ });
43
+
44
+ imageUpload.addEventListener('dragleave', () => {
45
+ imageUpload.classList.remove('drag-over');
46
+ });
47
+
48
+ imageUpload.addEventListener('drop', (e) => {
49
+ e.preventDefault();
50
+ imageUpload.classList.remove('drag-over');
51
+
52
+ const file = e.dataTransfer.files[0];
53
+ if (file && file.type.startsWith('image/')) {
54
+ this.processImageFile(file);
55
+ } else {
56
+ domManager.updateStatus('Please drop a valid image file', 'warning');
57
+ }
58
+ });
59
+
60
+ Logger.debug('Image', 'Drag and drop set up');
61
+ }
62
+
63
+ triggerImageUpload() {
64
+ const imageInput = domManager.getElement('imageInput');
65
+ if (imageInput) {
66
+ imageInput.click();
67
+ }
68
+ }
69
+
70
+ handleImageUpload(e) {
71
+ const file = e.target.files[0];
72
+ if (file) {
73
+ this.processImageFile(file);
74
+ }
75
+ }
76
+
77
+ processImageFile(file) {
78
+ // Validate file type
79
+ if (!file.type.startsWith('image/')) {
80
+ domManager.updateStatus('Please select a valid image file', 'warning');
81
+ return;
82
+ }
83
+
84
+ // Validate file size (10MB limit)
85
+ const maxSize = 10 * 1024 * 1024; // 10MB
86
+ if (file.size > maxSize) {
87
+ domManager.updateStatus('Image file is too large. Please select a file under 10MB', 'warning');
88
+ return;
89
+ }
90
+
91
+ const reader = new FileReader();
92
+
93
+ reader.onload = (e) => {
94
+ try {
95
+ this.displayImagePreview(e.target.result);
96
+ this.currentImageData = e.target.result;
97
+ domManager.updateStatus(`Image "${file.name}" loaded successfully`, 'success');
98
+ Logger.debug('Image', `Image processed: ${file.name} (${file.size} bytes)`);
99
+ } catch (error) {
100
+ Logger.error('Image', 'Error processing image:', error);
101
+ domManager.updateStatus('Error processing image', 'error');
102
+ }
103
+ };
104
+
105
+ reader.onerror = () => {
106
+ Logger.error('Image', 'Error reading image file');
107
+ domManager.updateStatus('Error reading image file', 'error');
108
+ };
109
+
110
+ reader.readAsDataURL(file);
111
+ }
112
+
113
+ displayImagePreview(imageSrc) {
114
+ const imagePreview = domManager.getElement('imagePreview');
115
+ const imageUpload = domManager.getElement('imageUpload');
116
+ const imageUploadBtn = domManager.getElement('imageUploadBtn');
117
+
118
+ if (imagePreview) {
119
+ imagePreview.src = imageSrc;
120
+ imagePreview.style.display = 'block';
121
+ }
122
+
123
+ if (imageUpload) {
124
+ imageUpload.classList.add('has-image');
125
+ }
126
+
127
+ if (imageUploadBtn) {
128
+ imageUploadBtn.innerHTML = `
129
+ <i data-feather="check-circle" style="width: 16px; height: 16px;"></i>
130
+ Image Selected
131
+ `;
132
+
133
+ // Replace feather icons
134
+ if (typeof feather !== 'undefined') {
135
+ feather.replace(imageUploadBtn);
136
+ }
137
+ }
138
+ }
139
+
140
+ clearImage() {
141
+ const imagePreview = domManager.getElement('imagePreview');
142
+ const imageUpload = domManager.getElement('imageUpload');
143
+ const imageUploadBtn = domManager.getElement('imageUploadBtn');
144
+ const imageInput = domManager.getElement('imageInput');
145
+
146
+ if (imagePreview) {
147
+ imagePreview.style.display = 'none';
148
+ imagePreview.src = '';
149
+ }
150
+
151
+ if (imageUpload) {
152
+ imageUpload.classList.remove('has-image');
153
+ }
154
+
155
+ if (imageUploadBtn) {
156
+ imageUploadBtn.innerHTML = `
157
+ <i data-feather="upload" style="width: 16px; height: 16px;"></i>
158
+ Upload Image
159
+ `;
160
+
161
+ // Replace feather icons
162
+ if (typeof feather !== 'undefined') {
163
+ feather.replace(imageUploadBtn);
164
+ }
165
+ }
166
+
167
+ if (imageInput) {
168
+ imageInput.value = '';
169
+ }
170
+
171
+ this.currentImageData = null;
172
+ Logger.debug('Image', 'Image cleared');
173
+ }
174
+
175
+ getCurrentImageData() {
176
+ return this.currentImageData;
177
+ }
178
+
179
+ hasImage() {
180
+ return this.currentImageData !== null;
181
+ }
182
+
183
+ // Get image data in format suitable for sending to server
184
+ getImageForSubmission() {
185
+ if (!this.currentImageData) {
186
+ return null;
187
+ }
188
+
189
+ try {
190
+ // Extract base64 data without the data URL prefix
191
+ const base64Data = this.currentImageData.split(',')[1];
192
+ const mimeType = this.currentImageData.split(';')[0].split(':')[1];
193
+
194
+ return {
195
+ data: base64Data,
196
+ mimeType: mimeType,
197
+ filename: `uploaded_image.${this.getExtensionFromMimeType(mimeType)}`
198
+ };
199
+ } catch (error) {
200
+ Logger.error('Image', 'Error preparing image for submission:', error);
201
+ return null;
202
+ }
203
+ }
204
+
205
+ getExtensionFromMimeType(mimeType) {
206
+ const extensions = {
207
+ 'image/jpeg': 'jpg',
208
+ 'image/jpg': 'jpg',
209
+ 'image/png': 'png',
210
+ 'image/gif': 'gif',
211
+ 'image/webp': 'webp',
212
+ 'image/bmp': 'bmp',
213
+ 'image/svg+xml': 'svg'
214
+ };
215
+
216
+ return extensions[mimeType] || 'jpg';
217
+ }
218
+
219
+ // Validate image before processing
220
+ validateImage(file) {
221
+ const validTypes = ['image/jpeg', 'image/jpg', 'image/png', 'image/gif', 'image/webp', 'image/bmp'];
222
+ const maxSize = 10 * 1024 * 1024; // 10MB
223
+
224
+ const errors = [];
225
+
226
+ if (!validTypes.includes(file.type)) {
227
+ errors.push('Invalid file type. Please select a JPEG, PNG, GIF, WebP, or BMP image.');
228
+ }
229
+
230
+ if (file.size > maxSize) {
231
+ errors.push('File size too large. Please select an image under 10MB.');
232
+ }
233
+
234
+ if (file.size === 0) {
235
+ errors.push('File appears to be empty.');
236
+ }
237
+
238
+ return {
239
+ isValid: errors.length === 0,
240
+ errors: errors
241
+ };
242
+ }
243
+
244
+ // Get image metadata
245
+ getImageMetadata(file) {
246
+ return {
247
+ name: file.name,
248
+ size: file.size,
249
+ type: file.type,
250
+ lastModified: file.lastModified ? new Date(file.lastModified) : null
251
+ };
252
+ }
253
+
254
+ // Handle paste events for image upload
255
+ setupPasteHandler() {
256
+ document.addEventListener('paste', (e) => {
257
+ const items = e.clipboardData?.items;
258
+ if (!items) return;
259
+
260
+ for (let i = 0; i < items.length; i++) {
261
+ const item = items[i];
262
+
263
+ if (item.type.startsWith('image/')) {
264
+ e.preventDefault();
265
+ const file = item.getAsFile();
266
+
267
+ if (file) {
268
+ this.processImageFile(file);
269
+ domManager.updateStatus('Image pasted from clipboard', 'success');
270
+ }
271
+ break;
272
+ }
273
+ }
274
+ });
275
+
276
+ Logger.debug('Image', 'Paste handler set up');
277
+ }
278
+
279
+ // Generate image thumbnail for preview
280
+ generateThumbnail(imageSrc, maxWidth = 200, maxHeight = 200) {
281
+ return new Promise((resolve, reject) => {
282
+ const img = new Image();
283
+ const canvas = document.createElement('canvas');
284
+ const ctx = canvas.getContext('2d');
285
+
286
+ img.onload = () => {
287
+ // Calculate new dimensions
288
+ let { width, height } = img;
289
+
290
+ if (width > height) {
291
+ if (width > maxWidth) {
292
+ height = (height * maxWidth) / width;
293
+ width = maxWidth;
294
+ }
295
+ } else {
296
+ if (height > maxHeight) {
297
+ width = (width * maxHeight) / height;
298
+ height = maxHeight;
299
+ }
300
+ }
301
+
302
+ canvas.width = width;
303
+ canvas.height = height;
304
+
305
+ // Draw resized image
306
+ ctx.drawImage(img, 0, 0, width, height);
307
+
308
+ // Convert to data URL
309
+ const thumbnailData = canvas.toDataURL('image/jpeg', 0.8);
310
+ resolve(thumbnailData);
311
+ };
312
+
313
+ img.onerror = () => {
314
+ reject(new Error('Failed to load image for thumbnail generation'));
315
+ };
316
+
317
+ img.src = imageSrc;
318
+ });
319
+ }
320
+
321
+ // SESSION MANAGEMENT METHODS
322
+ loadSessionImage(imageData) {
323
+ const imagePreview = domManager.getElement('imagePreview');
324
+ const imageUpload = document.querySelector('.image-upload');
325
+ const uploadBtn = document.querySelector('.image-upload-btn');
326
+
327
+ if (imageData && imagePreview && imageUpload && uploadBtn) {
328
+ // Load image into preview
329
+ imagePreview.src = imageData;
330
+ imagePreview.style.display = 'block';
331
+ imageUpload.classList.add('has-image');
332
+
333
+ // Update button state
334
+ uploadBtn.innerHTML = `
335
+ <i data-feather="check-circle" style="width: 16px; height: 16px;"></i>
336
+ Image Selected
337
+ `;
338
+
339
+ // Store image data
340
+ this.currentImageData = imageData;
341
+
342
+ Logger.debug('Image', 'Session image loaded');
343
+ } else {
344
+ // Clear image if no data provided
345
+ this.clearImage();
346
+ }
347
+
348
+ // Replace feather icons
349
+ try {
350
+ if (typeof feather !== 'undefined' && uploadBtn) {
351
+ feather.replace(uploadBtn);
352
+ }
353
+ } catch (e) {
354
+ Logger.warn('Image', 'Could not replace feather icons in upload button:', e);
355
+ }
356
+ }
357
+ }
358
+
359
+ // Create singleton instance
360
+ export const imageHandler = new ImageHandler();
src/pips/static/js/ui/interactive-feedback.js ADDED
@@ -0,0 +1,955 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Interactive Feedback UI for PIPS Interactive Mode
3
+ *
4
+ * This module handles the user interface for providing feedback on
5
+ * AI-generated code and critic suggestions during interactive solving.
6
+ */
7
+
8
+ class InteractiveFeedback {
9
+ constructor() {
10
+ this.feedbackPanel = null;
11
+ this.currentIteration = null;
12
+ this.currentCode = '';
13
+ this.currentSymbols = {};
14
+ this.criticText = '';
15
+ this.selectedRanges = [];
16
+ this.isVisible = false;
17
+ this.isResizing = false;
18
+ this.sidebarWidth = 380; // Default width
19
+ this.minWidth = 300;
20
+ this.maxWidth = 800;
21
+ this.feedbackCounter = 0;
22
+ this.isMinimized = false;
23
+ this.restoreButton = null;
24
+
25
+ // Store panel state for restoration
26
+ this.panelState = null;
27
+
28
+ this.initializeEventHandlers();
29
+ }
30
+
31
+ initializeEventHandlers() {
32
+ // Socket event handlers - Note: We don't handle these here anymore
33
+ // They are handled by the main socket event handlers in socket-handlers.js
34
+ // This class is called by those handlers when needed
35
+
36
+ // Add global mouse events for resizing
37
+ document.addEventListener('mousemove', (e) => this.handleMouseMove(e));
38
+ document.addEventListener('mouseup', () => this.handleMouseUp());
39
+ }
40
+
41
+ showFeedbackPanel(data) {
42
+ const { iteration, critic_text, code, symbols } = data;
43
+
44
+ this.currentIteration = iteration;
45
+ this.currentCode = code;
46
+ this.currentSymbols = symbols;
47
+ this.criticText = critic_text;
48
+ this.selectedRanges = [];
49
+ this.feedbackCounter = 0;
50
+ this.isMinimized = false;
51
+
52
+ // Store panel state for potential restoration
53
+ this.panelState = {
54
+ iteration,
55
+ critic_text,
56
+ code,
57
+ symbols
58
+ };
59
+
60
+ // Remove any existing restore button
61
+ this.removeRestoreButton();
62
+
63
+ this.renderFeedbackPanel();
64
+ }
65
+
66
+ renderFeedbackPanel() {
67
+ // Remove existing panel if any
68
+ this.removeFeedbackPanel();
69
+
70
+ // Create compact sidebar panel
71
+ this.feedbackPanel = document.createElement('div');
72
+ this.feedbackPanel.className = 'feedback-sidebar';
73
+ this.feedbackPanel.style.width = `${this.sidebarWidth}px`;
74
+ this.feedbackPanel.innerHTML = `
75
+ <div class="feedback-resize-handle" id="resize-handle"></div>
76
+
77
+ <div class="feedback-sidebar-header">
78
+ <div class="feedback-title">
79
+ <h4>Interactive Review</h4>
80
+ <span class="iteration-badge">Iteration ${this.currentIteration}</span>
81
+ </div>
82
+ <div class="feedback-controls">
83
+ <button class="feedback-close" id="feedback-close" title="Close panel">
84
+ <i data-feather="x"></i>
85
+ </button>
86
+ </div>
87
+ </div>
88
+
89
+ <div class="feedback-sidebar-content">
90
+ <!-- Symbols Section -->
91
+ <div class="symbols-section">
92
+ <div class="section-header">
93
+ <h5>Extracted Symbols</h5>
94
+ <button class="expand-symbols-btn" id="expand-symbols">
95
+ <i data-feather="eye"></i>
96
+ </button>
97
+ </div>
98
+ <div class="symbols-preview" id="symbols-preview">
99
+ ${this.renderSymbolsJSON()}
100
+ </div>
101
+ </div>
102
+
103
+ <!-- Code Preview Section -->
104
+ <div class="code-preview-section">
105
+ <div class="section-header">
106
+ <h5>Generated Code</h5>
107
+ <button class="expand-code-btn" id="expand-code">
108
+ <i data-feather="maximize-2"></i>
109
+ </button>
110
+ </div>
111
+ <div class="code-preview" id="code-preview">
112
+ <pre class="code-snippet hoverable-code" title="Click to expand and highlight code">${this.escapeHtml(this.truncateCode(this.currentCode))}</pre>
113
+ </div>
114
+ </div>
115
+
116
+ <!-- AI Critic Section -->
117
+ <div class="critic-section">
118
+ <div class="section-header">
119
+ <h5>AI Analysis</h5>
120
+ <label class="critic-toggle">
121
+ <input type="checkbox" id="accept-critic" checked>
122
+ <span class="toggle-slider"></span>
123
+ </label>
124
+ </div>
125
+ <div class="critic-summary">
126
+ ${this.formatCriticSummary(this.criticText)}
127
+ </div>
128
+ </div>
129
+
130
+ <!-- Feedback Cart -->
131
+ <div class="feedback-cart" id="feedback-cart">
132
+ <div class="section-header">
133
+ <h5>Your Feedback</h5>
134
+ <span class="cart-count" id="cart-count">0 items</span>
135
+ </div>
136
+ <div class="cart-items" id="cart-items">
137
+ <div class="empty-cart">
138
+ <i data-feather="message-circle"></i>
139
+ <p>No feedback added yet</p>
140
+ <small>Highlight code or symbols to add feedback</small>
141
+ </div>
142
+ </div>
143
+ </div>
144
+
145
+ <!-- Quick Actions -->
146
+ <div class="quick-actions">
147
+ <button class="action-btn secondary" id="add-comment">
148
+ <i data-feather="plus"></i>
149
+ Add General Comment
150
+ </button>
151
+ <button class="action-btn success" id="finish-here">
152
+ <i data-feather="check"></i>
153
+ Submit Feedback
154
+ </button>
155
+ </div>
156
+
157
+ <!-- Comments Section (Initially Hidden) -->
158
+ <div class="comments-section" id="comments-section" style="display: none;">
159
+ <h5>Add General Comment</h5>
160
+ <textarea id="user-comments"
161
+ placeholder="Add your general feedback here..."
162
+ rows="3"></textarea>
163
+ <div class="comment-actions">
164
+ <button class="action-btn small primary" id="save-comment">Add</button>
165
+ <button class="action-btn small secondary" id="cancel-comment">Cancel</button>
166
+ </div>
167
+ </div>
168
+ </div>
169
+
170
+ <!-- Symbols Modal (Hidden by default) -->
171
+ <div class="symbols-modal" id="symbols-modal" style="display: none;">
172
+ <div class="symbols-modal-content">
173
+ <div class="symbols-modal-header">
174
+ <h4>Extracted Symbols - Iteration ${this.currentIteration}</h4>
175
+ <button class="modal-close" id="close-symbols-modal">
176
+ <i data-feather="x"></i>
177
+ </button>
178
+ </div>
179
+ <div class="symbols-modal-body">
180
+ <div class="symbols-container">
181
+ <pre class="symbols-json selectable-json" id="symbols-json">${this.escapeHtml(JSON.stringify(this.currentSymbols, null, 2))}</pre>
182
+ </div>
183
+ <div class="selection-info">
184
+ <p>Select any part of the JSON to add specific feedback</p>
185
+ </div>
186
+
187
+ <!-- Dialogue Box for Symbol Feedback -->
188
+ <div class="dialogue-box" id="symbol-dialogue" style="display: none;">
189
+ <div class="dialogue-header">
190
+ <h6>Add Feedback</h6>
191
+ <button class="dialogue-close" id="close-symbol-dialogue">×</button>
192
+ </div>
193
+ <div class="dialogue-content">
194
+ <div class="highlighted-content">
195
+ <label>Selected:</label>
196
+ <div class="highlight-preview" id="symbol-highlight-preview"></div>
197
+ </div>
198
+ <div class="feedback-input">
199
+ <label>Your feedback:</label>
200
+ <textarea id="symbol-feedback-text" placeholder="Enter your feedback about this selection..." rows="3"></textarea>
201
+ </div>
202
+ <div class="dialogue-actions">
203
+ <button class="dialogue-btn primary" id="save-symbol-feedback">Add Feedback</button>
204
+ <button class="dialogue-btn secondary" id="cancel-symbol-feedback">Cancel</button>
205
+ </div>
206
+ </div>
207
+ </div>
208
+ </div>
209
+ </div>
210
+ </div>
211
+
212
+ <!-- Code Modal (Hidden by default) -->
213
+ <div class="code-modal" id="code-modal" style="display: none;">
214
+ <div class="code-modal-content">
215
+ <div class="code-modal-header">
216
+ <h4>Generated Code - Iteration ${this.currentIteration}</h4>
217
+ <button class="modal-close" id="close-code-modal">
218
+ <i data-feather="x"></i>
219
+ </button>
220
+ </div>
221
+ <div class="code-modal-body">
222
+ <div class="code-container">
223
+ <div class="code-gutter" id="code-gutter"></div>
224
+ <pre class="code-display selectable-code" id="code-display">${this.escapeHtml(this.currentCode)}</pre>
225
+ </div>
226
+ <div class="selection-info" id="selection-info">
227
+ <p>Select code to add specific feedback</p>
228
+ </div>
229
+
230
+ <!-- Dialogue Box for Code Feedback -->
231
+ <div class="dialogue-box" id="code-dialogue" style="display: none;">
232
+ <div class="dialogue-header">
233
+ <h6>Add Code Feedback</h6>
234
+ <button class="dialogue-close" id="close-code-dialogue">×</button>
235
+ </div>
236
+ <div class="dialogue-content">
237
+ <div class="highlighted-content">
238
+ <label>Selected Code:</label>
239
+ <div class="highlight-preview" id="code-highlight-preview"></div>
240
+ </div>
241
+ <div class="feedback-input">
242
+ <label>Your feedback:</label>
243
+ <textarea id="code-feedback-text" placeholder="Enter your feedback about this code..." rows="3"></textarea>
244
+ </div>
245
+ <div class="dialogue-actions">
246
+ <button class="dialogue-btn primary" id="save-code-feedback">Add Feedback</button>
247
+ <button class="dialogue-btn secondary" id="cancel-code-feedback">Cancel</button>
248
+ </div>
249
+ </div>
250
+ </div>
251
+ </div>
252
+ </div>
253
+ </div>
254
+ `;
255
+
256
+ // Insert panel into the body (overlay)
257
+ document.body.appendChild(this.feedbackPanel);
258
+
259
+ // Add event listeners
260
+ this.attachPanelEventListeners();
261
+
262
+ // Initialize feather icons
263
+ if (typeof feather !== 'undefined') {
264
+ feather.replace();
265
+ }
266
+
267
+ // Show panel with animation
268
+ setTimeout(() => {
269
+ this.feedbackPanel.classList.add('visible');
270
+ this.isVisible = true;
271
+ }, 10);
272
+ }
273
+
274
+ renderSymbolsJSON() {
275
+ if (!this.currentSymbols || Object.keys(this.currentSymbols).length === 0) {
276
+ return '<p class="no-symbols">No symbols extracted</p>';
277
+ }
278
+
279
+ const jsonString = JSON.stringify(this.currentSymbols, null, 2);
280
+ const truncatedJson = jsonString.length > 200 ? jsonString.substring(0, 200) + '\n ...\n}' : jsonString;
281
+
282
+ return `<pre class="symbols-json-preview selectable-json" title="Click to expand and highlight symbols">${this.escapeHtml(truncatedJson)}</pre>`;
283
+ }
284
+
285
+ attachPanelEventListeners() {
286
+ // Resize handle
287
+ document.getElementById('resize-handle').addEventListener('mousedown', (e) => {
288
+ this.startResize(e);
289
+ });
290
+
291
+ // Close button with confirmation
292
+ document.getElementById('feedback-close').addEventListener('click', () => {
293
+ this.confirmCloseFeedbackPanel();
294
+ });
295
+
296
+ // Expand symbols button
297
+ document.getElementById('expand-symbols').addEventListener('click', () => {
298
+ this.showSymbolsModal();
299
+ });
300
+
301
+ // Expand code button
302
+ document.getElementById('expand-code').addEventListener('click', () => {
303
+ this.showCodeModal();
304
+ });
305
+
306
+ // Add comment button
307
+ document.getElementById('add-comment').addEventListener('click', () => {
308
+ this.showCommentsSection();
309
+ });
310
+
311
+ // Finish button
312
+ document.getElementById('finish-here').addEventListener('click', () => {
313
+ this.submitFeedback();
314
+ });
315
+
316
+ // Comment actions
317
+ document.getElementById('save-comment').addEventListener('click', () => {
318
+ this.addGeneralComment();
319
+ });
320
+
321
+ document.getElementById('cancel-comment').addEventListener('click', () => {
322
+ this.hideCommentsSection();
323
+ document.getElementById('user-comments').value = '';
324
+ });
325
+
326
+ // Modal close buttons
327
+ document.getElementById('close-symbols-modal').addEventListener('click', () => {
328
+ this.hideSymbolsModal();
329
+ });
330
+
331
+ document.getElementById('close-code-modal').addEventListener('click', () => {
332
+ this.hideCodeModal();
333
+ });
334
+
335
+ // Click outside to close modals
336
+ document.getElementById('symbols-modal').addEventListener('click', (e) => {
337
+ if (e.target.id === 'symbols-modal') {
338
+ this.hideSymbolsModal();
339
+ }
340
+ });
341
+
342
+ document.getElementById('code-modal').addEventListener('click', (e) => {
343
+ if (e.target.id === 'code-modal') {
344
+ this.hideCodeModal();
345
+ }
346
+ });
347
+
348
+ // Dialogue close buttons
349
+ document.getElementById('close-symbol-dialogue')?.addEventListener('click', () => {
350
+ this.hideSymbolDialogue();
351
+ });
352
+
353
+ document.getElementById('close-code-dialogue')?.addEventListener('click', () => {
354
+ this.hideCodeDialogue();
355
+ });
356
+
357
+ // Dialogue action buttons
358
+ document.getElementById('save-symbol-feedback')?.addEventListener('click', () => {
359
+ this.saveSymbolFeedback();
360
+ });
361
+
362
+ document.getElementById('cancel-symbol-feedback')?.addEventListener('click', () => {
363
+ this.hideSymbolDialogue();
364
+ });
365
+
366
+ document.getElementById('save-code-feedback')?.addEventListener('click', () => {
367
+ this.saveCodeFeedback();
368
+ });
369
+
370
+ document.getElementById('cancel-code-feedback')?.addEventListener('click', () => {
371
+ this.hideCodeDialogue();
372
+ });
373
+
374
+ // Preview click handlers
375
+ document.querySelector('.hoverable-code').addEventListener('click', () => {
376
+ this.showCodeModal();
377
+ });
378
+
379
+ document.querySelector('.selectable-json')?.addEventListener('click', () => {
380
+ this.showSymbolsModal();
381
+ });
382
+ }
383
+
384
+ startResize(e) {
385
+ this.isResizing = true;
386
+ this.startX = e.clientX;
387
+ this.startWidth = this.sidebarWidth;
388
+
389
+ // Add visual feedback
390
+ document.body.style.cursor = 'ew-resize';
391
+ this.feedbackPanel.classList.add('resizing');
392
+
393
+ e.preventDefault();
394
+ }
395
+
396
+ handleMouseMove(e) {
397
+ if (!this.isResizing) return;
398
+
399
+ const deltaX = this.startX - e.clientX;
400
+ const newWidth = Math.max(this.minWidth, Math.min(this.maxWidth, this.startWidth + deltaX));
401
+
402
+ this.sidebarWidth = newWidth;
403
+ this.feedbackPanel.style.width = `${newWidth}px`;
404
+ }
405
+
406
+ handleMouseUp() {
407
+ if (!this.isResizing) return;
408
+
409
+ this.isResizing = false;
410
+ document.body.style.cursor = '';
411
+ this.feedbackPanel.classList.remove('resizing');
412
+ }
413
+
414
+ showSymbolsModal() {
415
+ const modal = document.getElementById('symbols-modal');
416
+ modal.style.display = 'flex';
417
+
418
+ // Initialize JSON selection
419
+ setTimeout(() => {
420
+ this.initializeJSONSelection();
421
+ }, 10);
422
+ }
423
+
424
+ hideSymbolsModal() {
425
+ const modal = document.getElementById('symbols-modal');
426
+ modal.style.display = 'none';
427
+ this.hideSymbolDialogue();
428
+ }
429
+
430
+ showCodeModal() {
431
+ const modal = document.getElementById('code-modal');
432
+ modal.style.display = 'flex';
433
+
434
+ // Add line numbers and initialize code selection
435
+ setTimeout(() => {
436
+ this.addLineNumbers();
437
+ this.initializeCodeSelection();
438
+ }, 10);
439
+ }
440
+
441
+ hideCodeModal() {
442
+ const modal = document.getElementById('code-modal');
443
+ modal.style.display = 'none';
444
+ this.hideCodeDialogue();
445
+ }
446
+
447
+ initializeJSONSelection() {
448
+ const jsonElement = document.getElementById('symbols-json');
449
+ if (jsonElement) {
450
+ jsonElement.addEventListener('mouseup', () => {
451
+ this.handleJSONSelection();
452
+ });
453
+ }
454
+ }
455
+
456
+ initializeCodeSelection() {
457
+ const codeDisplay = document.getElementById('code-display');
458
+ if (codeDisplay) {
459
+ codeDisplay.addEventListener('mouseup', () => {
460
+ this.handleCodeSelection();
461
+ });
462
+ }
463
+ }
464
+
465
+ handleJSONSelection() {
466
+ const selection = window.getSelection();
467
+ if (selection.rangeCount > 0 && !selection.isCollapsed) {
468
+ const selectedText = selection.toString().trim();
469
+ if (selectedText) {
470
+ this.showSymbolDialogue(selectedText);
471
+ }
472
+ }
473
+ }
474
+
475
+ handleCodeSelection() {
476
+ const selection = window.getSelection();
477
+ if (selection.rangeCount > 0 && !selection.isCollapsed) {
478
+ const selectedText = selection.toString().trim();
479
+ if (selectedText) {
480
+ this.showCodeDialogue(selectedText);
481
+ }
482
+ }
483
+ }
484
+
485
+ showSymbolDialogue(selectedText) {
486
+ const dialogue = document.getElementById('symbol-dialogue');
487
+ const preview = document.getElementById('symbol-highlight-preview');
488
+
489
+ preview.innerHTML = `<pre>${this.escapeHtml(selectedText)}</pre>`;
490
+ dialogue.style.display = 'block';
491
+
492
+ // Focus on textarea
493
+ document.getElementById('symbol-feedback-text').focus();
494
+
495
+ // Store selected text
496
+ this.currentSelection = {
497
+ type: 'symbol',
498
+ text: selectedText
499
+ };
500
+ }
501
+
502
+ showCodeDialogue(selectedText) {
503
+ const dialogue = document.getElementById('code-dialogue');
504
+ const preview = document.getElementById('code-highlight-preview');
505
+
506
+ preview.innerHTML = `<pre>${this.escapeHtml(selectedText)}</pre>`;
507
+ dialogue.style.display = 'block';
508
+
509
+ // Focus on textarea
510
+ document.getElementById('code-feedback-text').focus();
511
+
512
+ // Store selected text
513
+ this.currentSelection = {
514
+ type: 'code',
515
+ text: selectedText
516
+ };
517
+ }
518
+
519
+ hideSymbolDialogue() {
520
+ const dialogue = document.getElementById('symbol-dialogue');
521
+ dialogue.style.display = 'none';
522
+ document.getElementById('symbol-feedback-text').value = '';
523
+ window.getSelection().removeAllRanges();
524
+ }
525
+
526
+ hideCodeDialogue() {
527
+ const dialogue = document.getElementById('code-dialogue');
528
+ dialogue.style.display = 'none';
529
+ document.getElementById('code-feedback-text').value = '';
530
+ window.getSelection().removeAllRanges();
531
+ }
532
+
533
+ saveSymbolFeedback() {
534
+ const feedbackText = document.getElementById('symbol-feedback-text').value.trim();
535
+ if (feedbackText && this.currentSelection) {
536
+ this.addFeedbackItem('symbol', this.currentSelection.text, feedbackText);
537
+ this.hideSymbolDialogue();
538
+ this.showNotification('Symbol feedback added');
539
+ }
540
+ }
541
+
542
+ saveCodeFeedback() {
543
+ const feedbackText = document.getElementById('code-feedback-text').value.trim();
544
+ if (feedbackText && this.currentSelection) {
545
+ this.addFeedbackItem('code', this.currentSelection.text, feedbackText);
546
+ this.hideCodeDialogue();
547
+ this.showNotification('Code feedback added');
548
+ }
549
+ }
550
+
551
+ addGeneralComment() {
552
+ const comment = document.getElementById('user-comments').value.trim();
553
+ if (comment) {
554
+ this.addFeedbackItem('general', '', comment);
555
+ this.hideCommentsSection();
556
+ document.getElementById('user-comments').value = '';
557
+ this.showNotification('General comment added');
558
+ }
559
+ }
560
+
561
+ addFeedbackItem(type, selectedText, comment) {
562
+ const feedback = {
563
+ id: ++this.feedbackCounter,
564
+ type: type,
565
+ text: selectedText,
566
+ comment: comment,
567
+ timestamp: new Date().toLocaleTimeString()
568
+ };
569
+
570
+ this.selectedRanges.push(feedback);
571
+ this.updateFeedbackCart();
572
+ }
573
+
574
+ updateFeedbackCart() {
575
+ const cartItems = document.getElementById('cart-items');
576
+ const cartCount = document.getElementById('cart-count');
577
+
578
+ cartCount.textContent = `${this.selectedRanges.length} item${this.selectedRanges.length !== 1 ? 's' : ''}`;
579
+
580
+ if (this.selectedRanges.length === 0) {
581
+ cartItems.innerHTML = `
582
+ <div class="empty-cart">
583
+ <i data-feather="message-circle"></i>
584
+ <p>No feedback added yet</p>
585
+ <small>Highlight code or symbols to add feedback</small>
586
+ </div>
587
+ `;
588
+ if (typeof feather !== 'undefined') {
589
+ feather.replace();
590
+ }
591
+ return;
592
+ }
593
+
594
+ const items = this.selectedRanges.map(item => {
595
+ const typeIcon = item.type === 'code' ? 'code' : item.type === 'symbol' ? 'hash' : 'message-circle';
596
+ const typeLabel = item.type === 'code' ? 'Code' : item.type === 'symbol' ? 'Symbol' : 'General';
597
+ const preview = item.text ? (item.text.length > 50 ? item.text.substring(0, 50) + '...' : item.text) : '';
598
+
599
+ return `
600
+ <div class="cart-item" data-id="${item.id}">
601
+ <div class="cart-item-header">
602
+ <div class="cart-item-type">
603
+ <i data-feather="${typeIcon}"></i>
604
+ <span>${typeLabel}</span>
605
+ <small>${item.timestamp}</small>
606
+ </div>
607
+ <div class="cart-item-actions">
608
+ <button class="cart-action edit" onclick="window.interactiveFeedback.editFeedback(${item.id})" title="Edit">
609
+ <i data-feather="edit-2"></i>
610
+ </button>
611
+ <button class="cart-action remove" onclick="window.interactiveFeedback.removeFeedback(${item.id})" title="Remove">
612
+ <i data-feather="trash-2"></i>
613
+ </button>
614
+ </div>
615
+ </div>
616
+ ${preview ? `<div class="cart-item-preview">${this.escapeHtml(preview)}</div>` : ''}
617
+ <div class="cart-item-comment">${this.escapeHtml(item.comment)}</div>
618
+ </div>
619
+ `;
620
+ }).join('');
621
+
622
+ cartItems.innerHTML = items;
623
+
624
+ // Re-initialize feather icons
625
+ if (typeof feather !== 'undefined') {
626
+ feather.replace();
627
+ }
628
+ }
629
+
630
+ editFeedback(id) {
631
+ const feedback = this.selectedRanges.find(item => item.id === id);
632
+ if (!feedback) return;
633
+
634
+ const newComment = prompt(`Edit your feedback:\n\n${feedback.text ? 'Selected: ' + feedback.text + '\n\n' : ''}Current feedback:`, feedback.comment);
635
+ if (newComment !== null && newComment.trim() !== '') {
636
+ feedback.comment = newComment.trim();
637
+ this.updateFeedbackCart();
638
+ this.showNotification('Feedback updated');
639
+ }
640
+ }
641
+
642
+ removeFeedback(id) {
643
+ this.selectedRanges = this.selectedRanges.filter(item => item.id !== id);
644
+ this.updateFeedbackCart();
645
+ this.showNotification('Feedback removed');
646
+ }
647
+
648
+ showCommentsSection() {
649
+ const section = document.getElementById('comments-section');
650
+ section.style.display = 'block';
651
+ document.getElementById('user-comments').focus();
652
+ }
653
+
654
+ hideCommentsSection() {
655
+ const section = document.getElementById('comments-section');
656
+ section.style.display = 'none';
657
+ }
658
+
659
+
660
+
661
+ confirmCloseFeedbackPanel() {
662
+ const hasUnsavedFeedback = this.selectedRanges.length > 0;
663
+
664
+ let message = 'Are you sure you want to close the feedback panel?';
665
+ if (hasUnsavedFeedback) {
666
+ message += '\n\nYou have unsaved feedback that will be lost. The interactive session will not be able to continue without your feedback.';
667
+ } else {
668
+ message += '\n\nWithout providing feedback, the interactive session cannot continue.';
669
+ }
670
+
671
+ if (confirm(message)) {
672
+ this.hideFeedbackPanel();
673
+ }
674
+ }
675
+
676
+ hideFeedbackPanel() {
677
+ if (this.feedbackPanel) {
678
+ this.feedbackPanel.classList.remove('visible');
679
+ setTimeout(() => {
680
+ this.removeFeedbackPanel();
681
+ this.showRestoreButton();
682
+ }, 300);
683
+ }
684
+ }
685
+
686
+ showRestoreButton() {
687
+ // Remove existing restore button if any
688
+ this.removeRestoreButton();
689
+
690
+ // Create restore button in chat area
691
+ this.restoreButton = document.createElement('div');
692
+ this.restoreButton.className = 'feedback-restore-container';
693
+ this.restoreButton.innerHTML = `
694
+ <div class="feedback-restore-banner">
695
+ <div class="restore-actions">
696
+ <button class="btn-restore-feedback" id="restore-feedback-btn">
697
+ <i data-feather="edit-3"></i>
698
+ Continue Reviewing
699
+ </button>
700
+ <button class="btn-terminate-session" id="terminate-session-btn">
701
+ <i data-feather="check-circle"></i>
702
+ Finish Here
703
+ </button>
704
+ </div>
705
+ </div>
706
+ `;
707
+
708
+ // Add to chat container
709
+ const chatContainer = document.getElementById('chat-container') || document.getElementById('chatArea');
710
+ if (chatContainer) {
711
+ chatContainer.appendChild(this.restoreButton);
712
+ }
713
+
714
+ // Add event listeners
715
+ document.getElementById('restore-feedback-btn').addEventListener('click', () => {
716
+ this.restoreFeedbackPanel();
717
+ });
718
+
719
+ document.getElementById('terminate-session-btn').addEventListener('click', () => {
720
+ this.terminateInteractiveSession();
721
+ });
722
+
723
+ // Initialize feather icons
724
+ if (typeof feather !== 'undefined') {
725
+ feather.replace();
726
+ }
727
+ }
728
+
729
+ removeRestoreButton() {
730
+ if (this.restoreButton && document.body.contains(this.restoreButton)) {
731
+ this.restoreButton.remove();
732
+ }
733
+ this.restoreButton = null;
734
+ }
735
+
736
+ restoreFeedbackPanel() {
737
+ if (this.panelState) {
738
+ // Remove restore button
739
+ this.removeRestoreButton();
740
+
741
+ // Restore the panel with saved state
742
+ if (this.isMinimized && this.feedbackPanel) {
743
+ // Panel exists but is hidden, just show it
744
+ this.feedbackPanel.style.display = 'block';
745
+ this.isMinimized = false;
746
+ this.isVisible = true;
747
+ } else {
748
+ // Panel was completely removed, recreate it
749
+ this.showFeedbackPanel(this.panelState);
750
+ }
751
+
752
+ this.showNotification('Welcome back! Ready to continue reviewing the AI\'s work.');
753
+ }
754
+ }
755
+
756
+ terminateInteractiveSession() {
757
+ if (confirm('Are you sure you want to end the interactive session?\n\nThis will stop the AI from waiting for feedback and provide the current solution as final.')) {
758
+ // Remove restore button
759
+ this.removeRestoreButton();
760
+
761
+ // Send termination signal
762
+ import('../network/socket.js').then(({ socketManager }) => {
763
+ socketManager.send('terminate_session');
764
+ });
765
+
766
+ this.showNotification('Session ended. The AI will finalize the current solution.');
767
+ }
768
+ }
769
+
770
+ truncateCode(code) {
771
+ const lines = code.split('\n');
772
+ if (lines.length <= 8) {
773
+ return code;
774
+ }
775
+ return lines.slice(0, 8).join('\n') + '\n... (click to expand)';
776
+ }
777
+
778
+ formatCriticSummary(text) {
779
+ if (!text || text.trim() === '') {
780
+ return '<p class="no-issues">No issues found by AI critic.</p>';
781
+ }
782
+
783
+ // Extract first sentence or first 100 characters
784
+ const summary = text.length > 100 ? text.substring(0, 100) + '...' : text;
785
+ return `<p class="critic-summary-text">${this.escapeHtml(summary)}</p>`;
786
+ }
787
+
788
+ addLineNumbers() {
789
+ const codeDisplay = document.getElementById('code-display');
790
+ const codeGutter = document.getElementById('code-gutter');
791
+
792
+ if (codeDisplay && codeGutter) {
793
+ const lines = this.currentCode.split('\n');
794
+ const gutterHTML = lines.map((_, index) =>
795
+ `<div class="line-number" data-line="${index + 1}">${index + 1}</div>`
796
+ ).join('');
797
+
798
+ codeGutter.innerHTML = gutterHTML;
799
+ }
800
+ }
801
+
802
+ showNotification(message) {
803
+ const notification = document.createElement('div');
804
+ notification.className = 'feedback-notification';
805
+ notification.textContent = message;
806
+
807
+ document.body.appendChild(notification);
808
+
809
+ setTimeout(() => {
810
+ notification.classList.add('visible');
811
+ }, 10);
812
+
813
+ setTimeout(() => {
814
+ notification.classList.remove('visible');
815
+ setTimeout(() => {
816
+ if (document.body.contains(notification)) {
817
+ document.body.removeChild(notification);
818
+ }
819
+ }, 300);
820
+ }, 2000);
821
+ }
822
+
823
+ submitFeedback() {
824
+ const acceptCritic = document.getElementById('accept-critic').checked;
825
+
826
+ this.disableButtons();
827
+ this.showLoadingState('Submitting feedback...');
828
+
829
+ // Convert feedback to the expected format
830
+ const quotedRanges = this.selectedRanges.map(item => {
831
+ if (item.type === 'symbol') {
832
+ return {
833
+ text: `Symbol JSON: ${item.text}`,
834
+ comment: item.comment
835
+ };
836
+ } else if (item.type === 'code') {
837
+ return {
838
+ text: item.text,
839
+ comment: item.comment
840
+ };
841
+ } else {
842
+ return {
843
+ text: 'General Comment',
844
+ comment: item.comment
845
+ };
846
+ }
847
+ });
848
+
849
+ // Import socket manager and send feedback
850
+ import('../network/socket.js').then(({ socketManager }) => {
851
+ socketManager.send('provide_feedback', {
852
+ accept_critic: acceptCritic,
853
+ extra_comments: '',
854
+ quoted_ranges: quotedRanges,
855
+ terminate: false // Continue the process, don't terminate
856
+ });
857
+ });
858
+
859
+ // Clean up the panel completely after submitting feedback
860
+ this.removeFeedbackPanel();
861
+ this.removeRestoreButton();
862
+ }
863
+
864
+ disableButtons() {
865
+ const buttons = this.feedbackPanel.querySelectorAll('button');
866
+ buttons.forEach(btn => btn.disabled = true);
867
+ }
868
+
869
+ showLoadingState(message) {
870
+ // Show loading indicator in the sidebar
871
+ const content = this.feedbackPanel.querySelector('.feedback-sidebar-content');
872
+ if (content) {
873
+ content.innerHTML = `
874
+ <div class="loading-state">
875
+ <div class="loading-spinner"></div>
876
+ <p>${message}</p>
877
+ </div>
878
+ `;
879
+ }
880
+ }
881
+
882
+ removeFeedbackPanel() {
883
+ if (this.feedbackPanel && document.body.contains(this.feedbackPanel)) {
884
+ document.body.removeChild(this.feedbackPanel);
885
+ }
886
+ this.feedbackPanel = null;
887
+ this.isVisible = false;
888
+ }
889
+
890
+ showFinalArtifacts(data) {
891
+ // Show final artifacts in a compact way
892
+ const artifactsPanel = document.createElement('div');
893
+ artifactsPanel.className = 'final-artifacts-compact';
894
+ artifactsPanel.innerHTML = `
895
+ <div class="artifacts-header">
896
+ <h4>Final Solution</h4>
897
+ <button class="artifacts-close" onclick="this.parentElement.parentElement.remove()">
898
+ <i data-feather="x"></i>
899
+ </button>
900
+ </div>
901
+ <div class="artifacts-content">
902
+ <div class="artifacts-summary">
903
+ <p>Solution completed successfully!</p>
904
+ <button class="view-details-btn" onclick="this.nextElementSibling.style.display = this.nextElementSibling.style.display === 'none' ? 'block' : 'none'">
905
+ View Details
906
+ </button>
907
+ <div class="artifacts-details" style="display: none;">
908
+ <div class="artifact-section">
909
+ <h5>Final Code</h5>
910
+ <pre class="artifact-code">${this.escapeHtml(data.code || 'No code available')}</pre>
911
+ </div>
912
+ <div class="artifact-section">
913
+ <h5>Extracted Symbols</h5>
914
+ <pre class="artifact-json">${JSON.stringify(data.symbols || {}, null, 2)}</pre>
915
+ </div>
916
+ </div>
917
+ </div>
918
+ </div>
919
+ `;
920
+
921
+ // Add to chat area
922
+ const chatContainer = document.getElementById('chat-container');
923
+ if (chatContainer) {
924
+ chatContainer.appendChild(artifactsPanel);
925
+ }
926
+
927
+ // Initialize feather icons
928
+ if (typeof feather !== 'undefined') {
929
+ feather.replace();
930
+ }
931
+ }
932
+
933
+ handleModeSwitched(data) {
934
+ // Handle mode switching if needed
935
+ this.updateModeIndicator(data.mode);
936
+ }
937
+
938
+ updateModeIndicator(mode) {
939
+ // Update any mode indicators in the UI
940
+ const indicators = document.querySelectorAll('.mode-badge');
941
+ indicators.forEach(indicator => {
942
+ indicator.textContent = mode;
943
+ indicator.className = `mode-badge mode-${mode.toLowerCase()}`;
944
+ });
945
+ }
946
+
947
+ escapeHtml(text) {
948
+ const div = document.createElement('div');
949
+ div.textContent = text;
950
+ return div.innerHTML;
951
+ }
952
+ }
953
+
954
+ // Create global instance
955
+ window.interactiveFeedback = new InteractiveFeedback();
src/pips/static/js/ui/message-manager.js ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Message Manager - Handles chat messages, streaming, and code execution display
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { domManager } from './dom-manager.js';
6
+
7
+ export class MessageManager {
8
+ constructor() {
9
+ this.streamingMessages = new Map();
10
+ this.executionSpinners = new Map();
11
+ }
12
+
13
+ addMessage(sender, content, iteration = null, promptDetails = null) {
14
+ const messageDiv = document.createElement('div');
15
+ messageDiv.className = 'chat-message';
16
+
17
+ if (promptDetails) {
18
+ messageDiv.classList.add('expandable-message');
19
+ }
20
+
21
+ const avatarClass = sender === 'PIPS' || sender === 'PIPS System' ? 'avatar-pips' :
22
+ sender === 'AI Code Reviewer' ? 'avatar-reviewer' :
23
+ sender.includes('AI') ? 'avatar-llm' : 'avatar-system';
24
+ const avatarLetter = sender === 'PIPS' || sender === 'PIPS System' ? 'P' :
25
+ sender === 'AI Code Reviewer' ? 'QA' :
26
+ sender.includes('AI') ? 'AI' : 'S';
27
+
28
+ const iterationBadge = iteration ?
29
+ `<span class="iteration-badge">Iteration ${iteration}</span>` : '';
30
+
31
+ // Create expand toggle if prompt details are available
32
+ const expandToggle = promptDetails ? `
33
+ <button class="expand-toggle" onclick="window.pipsApp.toggleExpandMessage(this)">
34
+ <i data-feather="chevron-down" style="width: 12px; height: 12px;"></i>
35
+ Show Prompt
36
+ </button>
37
+ ` : '';
38
+
39
+ // Create expandable content if prompt details are available
40
+ const expandableContent = promptDetails ? `
41
+ <div class="expandable-content">
42
+ <div class="expandable-content-inner">
43
+ ${promptDetails.description ? `<div class="prompt-description">${this.escapeHtml(promptDetails.description)}</div>` : ''}
44
+ <div class="prompt-conversation">
45
+ ${promptDetails.conversation.map(msg => {
46
+ // Format content based on its structure
47
+ let formattedContent = '';
48
+ if (typeof msg.content === 'string') {
49
+ // Check if content looks like structured data or contains code blocks
50
+ if (msg.content.includes('```') || msg.content.includes('{') || msg.content.includes('[')) {
51
+ // Use markdown parsing for structured content
52
+ formattedContent = marked ? marked.parse(msg.content) : msg.content.replace(/\n/g, '<br>');
53
+ } else {
54
+ // Escape HTML but preserve line breaks for simple text
55
+ formattedContent = this.escapeHtml(msg.content).replace(/\n/g, '<br>');
56
+ }
57
+ } else if (Array.isArray(msg.content)) {
58
+ // Handle multimodal content (like image + text)
59
+ formattedContent = msg.content.map(item => {
60
+ if (item.type === 'text') {
61
+ return this.escapeHtml(item.text).replace(/\n/g, '<br>');
62
+ } else if (item.type === 'image_url') {
63
+ return '<div class="prompt-image">[Image content]</div>';
64
+ }
65
+ return this.escapeHtml(JSON.stringify(item));
66
+ }).join('');
67
+ } else {
68
+ // Fallback for other content types
69
+ formattedContent = this.escapeHtml(JSON.stringify(msg.content, null, 2)).replace(/\n/g, '<br>');
70
+ }
71
+
72
+ return `
73
+ <div class="prompt-message ${msg.role}">
74
+ <div class="prompt-role">${msg.role}</div>
75
+ <div class="prompt-content">${formattedContent}</div>
76
+ </div>
77
+ `;
78
+ }).join('')}
79
+ </div>
80
+ </div>
81
+ </div>
82
+ ` : '';
83
+
84
+ messageDiv.innerHTML = `
85
+ <div class="message-header">
86
+ <div class="message-avatar ${avatarClass}">${avatarLetter}</div>
87
+ <span class="message-sender">${this.escapeHtml(sender)}</span>
88
+ ${iterationBadge}
89
+ </div>
90
+ <div class="message-content">
91
+ ${marked ? marked.parse(content) : content}
92
+ ${expandToggle}
93
+ ${expandableContent}
94
+ </div>
95
+ `;
96
+
97
+ domManager.getElement('chatArea').appendChild(messageDiv);
98
+
99
+ // Re-highlight code blocks
100
+ if (typeof Prism !== 'undefined') {
101
+ Prism.highlightAll();
102
+ }
103
+
104
+ // Replace feather icons for the new expand toggle
105
+ if (promptDetails) {
106
+ feather.replace(messageDiv);
107
+ }
108
+
109
+ this.smartScrollToBottom();
110
+
111
+ // Save message incrementally during solving
112
+ this.saveMessageIncremental(sender, content, iteration, promptDetails);
113
+ }
114
+
115
+ displayFinalAnswer(answer) {
116
+ Logger.debug('MessageManager', 'displayFinalAnswer called with:', answer);
117
+
118
+ if (!answer || answer.trim() === '') {
119
+ Logger.warn('MessageManager', 'Empty or null final answer provided');
120
+ return;
121
+ }
122
+
123
+ // Remove any existing final answer elements to avoid duplicates
124
+ const existingAnswers = domManager.getElement('chatArea').querySelectorAll('.final-answer');
125
+ existingAnswers.forEach(el => el.remove());
126
+
127
+ const answerDiv = document.createElement('div');
128
+ answerDiv.className = 'final-answer';
129
+
130
+ if (typeof answer === 'string') {
131
+ if (answer.includes('<') && answer.includes('>')) {
132
+ answerDiv.innerHTML = answer;
133
+ } else {
134
+ answerDiv.textContent = answer;
135
+ }
136
+ } else {
137
+ answerDiv.textContent = String(answer);
138
+ }
139
+
140
+ domManager.getElement('chatArea').appendChild(answerDiv);
141
+
142
+ setTimeout(() => {
143
+ this.smartScrollToBottom();
144
+ }, 100);
145
+ }
146
+
147
+ smartScrollToBottom() {
148
+ const chatArea = domManager.getElement('chatArea');
149
+ const threshold = 100;
150
+ const shouldAutoScroll = (chatArea.scrollTop + chatArea.clientHeight >=
151
+ chatArea.scrollHeight - threshold);
152
+
153
+ if (shouldAutoScroll) {
154
+ chatArea.scrollTop = chatArea.scrollHeight;
155
+ }
156
+ }
157
+
158
+ escapeHtml(text) {
159
+ const div = document.createElement('div');
160
+ div.textContent = text;
161
+ return div.innerHTML;
162
+ }
163
+
164
+ // STREAMING MESSAGE METHODS
165
+ showAIThinkingIndicator(iteration, senderName = 'AI Assistant') {
166
+ // Remove any existing thinking indicator for this iteration and sender
167
+ this.removeAIThinkingIndicator(iteration, senderName);
168
+
169
+ const messageDiv = document.createElement('div');
170
+ messageDiv.className = 'chat-message ai-thinking';
171
+ messageDiv.setAttribute('data-iteration', iteration);
172
+ messageDiv.setAttribute('data-sender', senderName);
173
+
174
+ // Determine avatar based on sender
175
+ let avatarClass, avatarLetter, thinkingText;
176
+ if (senderName === 'AI Code Reviewer') {
177
+ avatarClass = 'avatar-reviewer';
178
+ avatarLetter = 'QA';
179
+ thinkingText = 'Code reviewer is analyzing...';
180
+ } else {
181
+ avatarClass = 'avatar-llm';
182
+ avatarLetter = 'AI';
183
+ thinkingText = 'AI is thinking...';
184
+ }
185
+
186
+ messageDiv.innerHTML = `
187
+ <div class="message-header">
188
+ <div class="message-avatar ${avatarClass}">${avatarLetter}</div>
189
+ <span class="message-sender">${senderName}</span>
190
+ ${iteration ? `<span class="iteration-badge">Iteration ${iteration}</span>` : ''}
191
+ </div>
192
+ <div class="message-content">
193
+ <div class="streaming-indicator">
194
+ <div class="spinner"></div>
195
+ <span>${thinkingText}</span>
196
+ </div>
197
+ </div>
198
+ `;
199
+
200
+ domManager.getElement('chatArea').appendChild(messageDiv);
201
+ this.smartScrollToBottom();
202
+ }
203
+
204
+ removeAIThinkingIndicator(iteration, senderName = null) {
205
+ const thinkingElements = domManager.getElement('chatArea').querySelectorAll('.ai-thinking');
206
+ thinkingElements.forEach(el => {
207
+ const matchesIteration = !iteration || el.getAttribute('data-iteration') == iteration;
208
+ const matchesSender = !senderName || el.getAttribute('data-sender') === senderName;
209
+
210
+ if (matchesIteration && matchesSender) {
211
+ el.remove();
212
+ }
213
+ });
214
+ }
215
+
216
+ updateStreamingMessage(token, iteration, sender) {
217
+ // Create a unique identifier for this streaming message based on iteration and sender
218
+ const streamingId = `${iteration}-${sender}`;
219
+
220
+ // Find or create streaming message
221
+ let streamingMessage = domManager.getElement('chatArea').querySelector(`[data-streaming-id="${streamingId}"]`);
222
+
223
+ if (!streamingMessage) {
224
+ // Remove thinking indicator if present for this specific sender
225
+ this.removeAIThinkingIndicator(iteration, sender);
226
+
227
+ // Create new streaming message
228
+ streamingMessage = document.createElement('div');
229
+ streamingMessage.className = 'chat-message streaming-message';
230
+ streamingMessage.setAttribute('data-streaming-iteration', iteration);
231
+ streamingMessage.setAttribute('data-streaming-id', streamingId);
232
+ streamingMessage.setAttribute('data-sender', sender);
233
+
234
+ // Determine avatar based on sender
235
+ let avatarClass, avatarLetter;
236
+ if (sender === 'AI Code Reviewer') {
237
+ avatarClass = 'avatar-reviewer';
238
+ avatarLetter = 'QA';
239
+ } else {
240
+ avatarClass = 'avatar-llm';
241
+ avatarLetter = 'AI';
242
+ }
243
+
244
+ streamingMessage.innerHTML = `
245
+ <div class="message-header">
246
+ <div class="message-avatar ${avatarClass}">${avatarLetter}</div>
247
+ <span class="message-sender">${sender}</span>
248
+ ${iteration ? `<span class="iteration-badge">Iteration ${iteration}</span>` : ''}
249
+ </div>
250
+ <div class="message-content">
251
+ <div class="streaming-text" data-content=""></div>
252
+ </div>
253
+ `;
254
+
255
+ domManager.getElement('chatArea').appendChild(streamingMessage);
256
+ }
257
+
258
+ // Update streaming content
259
+ const streamingText = streamingMessage.querySelector('.streaming-text');
260
+ const currentContent = streamingText.getAttribute('data-content') || '';
261
+ const newContent = currentContent + token;
262
+ streamingText.setAttribute('data-content', newContent);
263
+
264
+ // Remove any existing typing indicators first
265
+ const existingIndicators = streamingText.querySelectorAll('.typing-indicator');
266
+ existingIndicators.forEach(indicator => indicator.remove());
267
+
268
+ // Parse markdown if available
269
+ if (typeof marked !== 'undefined') {
270
+ streamingText.innerHTML = marked.parse(newContent);
271
+ } else {
272
+ streamingText.textContent = newContent;
273
+ }
274
+
275
+ // Add typing indicator at the very end of the content
276
+ const typingIndicator = document.createElement('span');
277
+ typingIndicator.className = 'typing-indicator';
278
+
279
+ // Find the last element in the streaming text and append the cursor inline
280
+ const lastElement = streamingText.lastElementChild;
281
+ if (lastElement && (lastElement.tagName === 'P' || lastElement.tagName === 'DIV' || lastElement.tagName === 'SPAN')) {
282
+ // Append to the last paragraph/div/span element to keep it inline
283
+ lastElement.appendChild(typingIndicator);
284
+ } else {
285
+ // If no suitable element found, append directly to streaming text
286
+ streamingText.appendChild(typingIndicator);
287
+ }
288
+
289
+ this.smartScrollToBottom();
290
+ }
291
+
292
+ finalizeStreamingMessage(iteration, sender = null) {
293
+ // If sender is specified, find the specific streaming message for that sender
294
+ // Otherwise, finalize all streaming messages for the iteration (backward compatibility)
295
+ let query;
296
+ if (sender) {
297
+ const streamingId = `${iteration}-${sender}`;
298
+ query = `[data-streaming-id="${streamingId}"]`;
299
+ } else {
300
+ query = `[data-streaming-iteration="${iteration}"]`;
301
+ }
302
+
303
+ const streamingMessages = domManager.getElement('chatArea').querySelectorAll(query);
304
+ streamingMessages.forEach(streamingMessage => {
305
+ // Remove typing indicator
306
+ const typingIndicator = streamingMessage.querySelector('.typing-indicator');
307
+ if (typingIndicator) {
308
+ typingIndicator.remove();
309
+ }
310
+
311
+ // Remove streaming attributes
312
+ streamingMessage.classList.remove('streaming-message');
313
+ streamingMessage.removeAttribute('data-streaming-iteration');
314
+ streamingMessage.removeAttribute('data-streaming-id');
315
+
316
+ // Re-highlight code blocks
317
+ if (typeof Prism !== 'undefined') {
318
+ Prism.highlightAll();
319
+ }
320
+ });
321
+ }
322
+
323
+ // CODE EXECUTION METHODS
324
+ showExecutionSpinner(iteration) {
325
+ // Remove any existing execution spinner for this iteration
326
+ this.removeExecutionSpinner(iteration);
327
+
328
+ const spinnerDiv = document.createElement('div');
329
+ spinnerDiv.className = 'execution-spinner';
330
+ spinnerDiv.setAttribute('data-execution-iteration', iteration);
331
+ spinnerDiv.innerHTML = `
332
+ <div class="spinner"></div>
333
+ <span>Executing code...</span>
334
+ `;
335
+
336
+ domManager.getElement('chatArea').appendChild(spinnerDiv);
337
+ this.smartScrollToBottom();
338
+ }
339
+
340
+ removeExecutionSpinner(iteration) {
341
+ const spinners = domManager.getElement('chatArea').querySelectorAll('.execution-spinner');
342
+ spinners.forEach(spinner => {
343
+ if (!iteration || spinner.getAttribute('data-execution-iteration') == iteration) {
344
+ spinner.remove();
345
+ }
346
+ });
347
+ }
348
+
349
+ displayExecutionResult(result, iteration, isError = false) {
350
+ const resultDiv = document.createElement('div');
351
+ resultDiv.className = `execution-result ${isError ? 'error' : ''}`;
352
+ resultDiv.textContent = result;
353
+
354
+ domManager.getElement('chatArea').appendChild(resultDiv);
355
+ this.smartScrollToBottom();
356
+ }
357
+
358
+ displayCode(code, iteration) {
359
+ const codeDiv = document.createElement('div');
360
+ codeDiv.className = 'code-block';
361
+ codeDiv.innerHTML = `<pre><code class="language-python">${this.escapeHtml(code)}</code></pre>`;
362
+
363
+ domManager.getElement('chatArea').appendChild(codeDiv);
364
+
365
+ if (typeof Prism !== 'undefined') {
366
+ Prism.highlightAll();
367
+ }
368
+
369
+ this.smartScrollToBottom();
370
+ }
371
+
372
+ toggleExpandMessage(button) {
373
+ const expandToggle = button;
374
+ const messageContent = button.closest('.message-content');
375
+ const expandableContent = messageContent.querySelector('.expandable-content');
376
+
377
+ if (!expandableContent) return;
378
+
379
+ const isExpanded = expandableContent.classList.contains('expanded');
380
+
381
+ if (isExpanded) {
382
+ expandableContent.classList.remove('expanded');
383
+ expandToggle.classList.remove('expanded');
384
+ expandToggle.innerHTML = `
385
+ <i data-feather="chevron-down" style="width: 12px; height: 12px;"></i>
386
+ Show Prompt
387
+ `;
388
+ } else {
389
+ expandableContent.classList.add('expanded');
390
+ expandToggle.classList.add('expanded');
391
+ expandToggle.innerHTML = `
392
+ <i data-feather="chevron-up" style="width: 12px; height: 12px;"></i>
393
+ Hide Prompt
394
+ `;
395
+ }
396
+
397
+ // Replace feather icons
398
+ feather.replace(expandToggle);
399
+
400
+ // Scroll to keep the message in view if needed
401
+ setTimeout(() => {
402
+ if (!isExpanded) {
403
+ this.smartScrollToBottom();
404
+ }
405
+ }, 300);
406
+ }
407
+
408
+ downloadChat() {
409
+ const chatContent = domManager.getElement('chatArea').innerHTML;
410
+ const blob = new Blob([`
411
+ <!DOCTYPE html>
412
+ <html>
413
+ <head>
414
+ <title>PIPS Chat Export</title>
415
+ <style>
416
+ body { font-family: Arial, sans-serif; margin: 20px; }
417
+ .chat-message { margin-bottom: 20px; }
418
+ .message-header { font-weight: bold; margin-bottom: 5px; }
419
+ .message-content { margin-left: 20px; }
420
+ </style>
421
+ </head>
422
+ <body>
423
+ <h1>PIPS Chat Export</h1>
424
+ <div class="chat-area">${chatContent}</div>
425
+ </body>
426
+ </html>
427
+ `], { type: 'text/html' });
428
+
429
+ const url = URL.createObjectURL(blob);
430
+ const a = document.createElement('a');
431
+ a.href = url;
432
+ a.download = `pips_chat_${new Date().toISOString().split('T')[0]}.html`;
433
+ document.body.appendChild(a);
434
+ a.click();
435
+ document.body.removeChild(a);
436
+ URL.revokeObjectURL(url);
437
+ }
438
+
439
+ // SESSION MANAGEMENT METHODS
440
+ getCurrentChatHistory() {
441
+ const chatArea = domManager.getElement('chatArea');
442
+ if (!chatArea) {
443
+ Logger.warn('MessageManager', 'Chat area not found');
444
+ return [];
445
+ }
446
+
447
+ const messages = chatArea.querySelectorAll('.chat-message');
448
+ const history = [];
449
+
450
+ messages.forEach(message => {
451
+ const senderElement = message.querySelector('.message-sender');
452
+ const contentElement = message.querySelector('.message-content');
453
+ const iterationElement = message.querySelector('.iteration-badge');
454
+
455
+ if (!senderElement || !contentElement) {
456
+ Logger.debug('MessageManager', 'Skipping malformed message');
457
+ return; // Skip malformed messages
458
+ }
459
+
460
+ const sender = senderElement.textContent || 'Unknown';
461
+ let content = '';
462
+
463
+ // Get content - extract only the main content, excluding expandable elements
464
+ let contentToSave = '';
465
+ const contentChildren = Array.from(contentElement.children);
466
+
467
+ // Look for the main content, excluding expand toggles and expandable content
468
+ contentChildren.forEach(child => {
469
+ if (!child.classList.contains('expand-toggle') &&
470
+ !child.classList.contains('expandable-content')) {
471
+ contentToSave += child.outerHTML;
472
+ }
473
+ });
474
+
475
+ // If no child elements found, get direct text content
476
+ if (!contentToSave) {
477
+ // Get text nodes directly, excluding expand button text
478
+ const clonedContent = contentElement.cloneNode(true);
479
+ const expandToggle = clonedContent.querySelector('.expand-toggle');
480
+ const expandableContent = clonedContent.querySelector('.expandable-content');
481
+ if (expandToggle) expandToggle.remove();
482
+ if (expandableContent) expandableContent.remove();
483
+ contentToSave = clonedContent.innerHTML.trim() || clonedContent.textContent.trim();
484
+ }
485
+
486
+ content = contentToSave;
487
+
488
+ const iteration = iterationElement ? iterationElement.textContent : null;
489
+
490
+ // Skip the welcome message
491
+ if (sender === 'PIPS System' && content.includes('Welcome to PIPS')) {
492
+ return;
493
+ }
494
+
495
+ // Skip empty messages but be more specific about what to filter
496
+ if (!content || content === '') {
497
+ Logger.debug('MessageManager', 'Skipping empty message');
498
+ return;
499
+ }
500
+
501
+ // Skip only currently active streaming indicators (not completed messages that might have streaming classes)
502
+ if (message.classList.contains('ai-thinking') ||
503
+ message.classList.contains('streaming-message') ||
504
+ content.includes('AI is thinking...') ||
505
+ content.includes('Executing code...')) {
506
+ Logger.debug('MessageManager', 'Skipping active streaming indicator');
507
+ return;
508
+ }
509
+
510
+ // Check if this message has prompt details
511
+ const expandableContent = message.querySelector('.expandable-content');
512
+ let promptDetails = null;
513
+
514
+ if (expandableContent) {
515
+ // Extract prompt details from the DOM
516
+ const promptDescription = expandableContent.querySelector('.prompt-description');
517
+ const promptMessages = expandableContent.querySelectorAll('.prompt-message');
518
+
519
+ if (promptMessages.length > 0) {
520
+ promptDetails = {
521
+ description: promptDescription ? promptDescription.textContent : '',
522
+ conversation: Array.from(promptMessages).map(promptMsg => ({
523
+ role: promptMsg.querySelector('.prompt-role').textContent.toLowerCase(),
524
+ content: promptMsg.querySelector('.prompt-content').textContent
525
+ }))
526
+ };
527
+ }
528
+ }
529
+
530
+ history.push({
531
+ sender,
532
+ content,
533
+ iteration,
534
+ promptDetails,
535
+ timestamp: new Date().toISOString()
536
+ });
537
+ });
538
+
539
+ Logger.debug('MessageManager', `Extracted ${history.length} messages from chat`);
540
+ return history;
541
+ }
542
+
543
+ loadChatHistory(history) {
544
+ const chatArea = domManager.getElement('chatArea');
545
+
546
+ // Find and preserve the welcome message first
547
+ let welcomeMessage = null;
548
+ const existingMessages = chatArea.querySelectorAll('.chat-message');
549
+ existingMessages.forEach(msg => {
550
+ const sender = msg.querySelector('.message-sender');
551
+ const content = msg.querySelector('.message-content');
552
+ if (sender && content &&
553
+ sender.textContent === 'PIPS System' &&
554
+ content.textContent.includes('Welcome to PIPS')) {
555
+ welcomeMessage = msg.cloneNode(true);
556
+ }
557
+ });
558
+
559
+ // Clear existing messages
560
+ chatArea.innerHTML = '';
561
+
562
+ // Restore welcome message if it existed
563
+ if (welcomeMessage) {
564
+ chatArea.appendChild(welcomeMessage);
565
+ }
566
+
567
+ // Load messages from history
568
+ if (history && history.length > 0) {
569
+ Logger.debug('MessageManager', `Loading ${history.length} messages from history`);
570
+
571
+ history.forEach((msg, index) => {
572
+ if (!msg || !msg.sender || !msg.content) {
573
+ Logger.warn('MessageManager', `Skipping invalid message at index ${index}:`, msg);
574
+ return;
575
+ }
576
+
577
+ const messageDiv = document.createElement('div');
578
+ messageDiv.className = 'chat-message';
579
+
580
+ const avatarClass = msg.sender === 'PIPS' || msg.sender === 'PIPS System' ? 'avatar-pips' :
581
+ msg.sender === 'AI Code Reviewer' ? 'avatar-reviewer' :
582
+ msg.sender.includes('AI') ? 'avatar-llm' : 'avatar-system';
583
+ const avatarLetter = msg.sender === 'PIPS' || msg.sender === 'PIPS System' ? 'P' :
584
+ msg.sender === 'AI Code Reviewer' ? 'QA' :
585
+ msg.sender.includes('AI') ? 'AI' : 'S';
586
+
587
+ const iterationBadge = msg.iteration ?
588
+ `<span class="iteration-badge">${this.escapeHtml(msg.iteration)}</span>` : '';
589
+
590
+ // Handle expandable content for loaded messages
591
+ const expandToggle = msg.promptDetails ? `
592
+ <button class="expand-toggle" onclick="window.pipsApp.toggleExpandMessage(this)">
593
+ <i data-feather="chevron-down" style="width: 12px; height: 12px;"></i>
594
+ Show Prompt
595
+ </button>
596
+ ` : '';
597
+
598
+ const expandableContent = msg.promptDetails ? `
599
+ <div class="expandable-content">
600
+ <div class="expandable-content-inner">
601
+ ${msg.promptDetails.description ? `<div class="prompt-description">${this.escapeHtml(msg.promptDetails.description)}</div>` : ''}
602
+ <div class="prompt-conversation">
603
+ ${msg.promptDetails.conversation.map(promptMsg => `
604
+ <div class="prompt-message ${promptMsg.role}">
605
+ <div class="prompt-role">${promptMsg.role}</div>
606
+ <div class="prompt-content">${this.escapeHtml(promptMsg.content)}</div>
607
+ </div>
608
+ `).join('')}
609
+ </div>
610
+ </div>
611
+ </div>
612
+ ` : '';
613
+
614
+ if (msg.promptDetails) {
615
+ messageDiv.classList.add('expandable-message');
616
+ }
617
+
618
+ messageDiv.innerHTML = `
619
+ <div class="message-header">
620
+ <div class="message-avatar ${avatarClass}">${avatarLetter}</div>
621
+ <span class="message-sender">${this.escapeHtml(msg.sender)}</span>
622
+ ${iterationBadge}
623
+ </div>
624
+ <div class="message-content">
625
+ ${msg.content}
626
+ ${expandToggle}
627
+ ${expandableContent}
628
+ </div>
629
+ `;
630
+
631
+ chatArea.appendChild(messageDiv);
632
+ });
633
+
634
+ // Replace feather icons for any expandable messages
635
+ if (typeof feather !== 'undefined') {
636
+ feather.replace(chatArea);
637
+ }
638
+ } else {
639
+ Logger.debug('MessageManager', 'No chat history to load');
640
+ }
641
+
642
+ // Re-highlight code blocks
643
+ if (typeof Prism !== 'undefined') {
644
+ Prism.highlightAll();
645
+ }
646
+
647
+ this.smartScrollToBottom();
648
+ }
649
+
650
+ clearChatAndRestoreWelcome() {
651
+ const chatArea = domManager.getElement('chatArea');
652
+ chatArea.innerHTML = '';
653
+
654
+ // Add fresh welcome message
655
+ const welcomeDiv = document.createElement('div');
656
+ welcomeDiv.className = 'chat-message';
657
+ welcomeDiv.innerHTML = `
658
+ <div class="message-header">
659
+ <div class="message-avatar avatar-pips">P</div>
660
+ <span class="message-sender">PIPS System</span>
661
+ </div>
662
+ <div class="message-content">
663
+ Welcome to PIPS! Enter a problem in the left panel and click "Solve Problem" to get started.
664
+ Don't forget to configure your model settings first.
665
+ </div>
666
+ `;
667
+ chatArea.appendChild(welcomeDiv);
668
+ }
669
+
670
+ // CLEANUP METHODS - for handling session interruptions and failures
671
+ cleanupAllActiveIndicators() {
672
+ Logger.debug('MessageManager', 'Cleaning up all active indicators');
673
+
674
+ // Remove all AI thinking indicators
675
+ const thinkingElements = domManager.getElement('chatArea').querySelectorAll('.ai-thinking');
676
+ thinkingElements.forEach(el => el.remove());
677
+
678
+ // Remove all execution spinners
679
+ const executionSpinners = domManager.getElement('chatArea').querySelectorAll('.execution-spinner');
680
+ executionSpinners.forEach(el => el.remove());
681
+
682
+ // Finalize all streaming messages
683
+ const streamingMessages = domManager.getElement('chatArea').querySelectorAll('.streaming-message');
684
+ streamingMessages.forEach(streamingMessage => {
685
+ // Remove typing indicator
686
+ const typingIndicator = streamingMessage.querySelector('.typing-indicator');
687
+ if (typingIndicator) {
688
+ typingIndicator.remove();
689
+ }
690
+
691
+ // Remove streaming attributes
692
+ streamingMessage.classList.remove('streaming-message');
693
+ streamingMessage.removeAttribute('data-streaming-iteration');
694
+ streamingMessage.removeAttribute('data-streaming-id');
695
+ });
696
+
697
+ // Re-highlight code blocks after cleanup
698
+ if (typeof Prism !== 'undefined') {
699
+ Prism.highlightAll();
700
+ }
701
+
702
+ Logger.debug('MessageManager', 'All active indicators cleaned up');
703
+ }
704
+
705
+ // For incremental saving during solving - save messages as they come in
706
+ saveMessageIncremental(sender, content, iteration = null, promptDetails = null) {
707
+ // This is called after each message is added to save it incrementally
708
+ // Import sessionManager to avoid circular dependency
709
+ import('./session-manager.js').then(({ sessionManager }) => {
710
+ if (window.appState && window.appState.currentSessionData) {
711
+ // Update chat history with current messages
712
+ window.appState.currentSessionData.chatHistory = this.getCurrentChatHistory();
713
+ window.appState.currentSessionData.lastUsed = new Date().toISOString();
714
+
715
+ // Save to storage incrementally
716
+ sessionManager.saveCurrentSessionToStorage();
717
+
718
+ Logger.debug('MessageManager', `Incrementally saved message from ${sender} to session`);
719
+ }
720
+ }).catch(err => {
721
+ Logger.warn('MessageManager', 'Could not save message incrementally:', err);
722
+ });
723
+ }
724
+ }
725
+
726
+ // Create singleton instance
727
+ export const messageManager = new MessageManager();
src/pips/static/js/ui/session-manager.js ADDED
@@ -0,0 +1,1451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Session Manager - Handles session UI and management functionality
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { appState } from '../core/state.js';
6
+ import { storageManager } from '../core/storage.js';
7
+ import { domManager } from './dom-manager.js';
8
+ import { messageManager } from './message-manager.js';
9
+ import { imageHandler } from './image-handler.js';
10
+
11
+ export class SessionManager {
12
+ constructor() {
13
+ this.isInitialized = false;
14
+ this.periodicSaveInterval = null;
15
+ }
16
+
17
+ initialize() {
18
+ if (this.isInitialized) return;
19
+
20
+ // Clean up ghost sessions on startup
21
+ this.cleanupGhostSessions();
22
+
23
+ this.setupEventListeners();
24
+ this.refreshSessionsList();
25
+ this.isInitialized = true;
26
+
27
+ Logger.debug('Session', 'Session manager initialized');
28
+ }
29
+
30
+ setupEventListeners() {
31
+ // Session management listeners
32
+ domManager.getElement('newSessionBtn')?.addEventListener('click', () => this.startNewSession());
33
+ domManager.getElement('sessionsToggle')?.addEventListener('click', () => this.toggleSessions());
34
+ domManager.getElement('clearSessionsBtn')?.addEventListener('click', () => this.clearAllSessionsEnhanced());
35
+ domManager.getElement('exportSessionsBtn')?.addEventListener('click', () => this.exportSessions());
36
+ domManager.getElement('importSessionsBtn')?.addEventListener('click', () => this.triggerImportSessions());
37
+
38
+ // Import file input handler
39
+ domManager.getElement('importSessionsInput')?.addEventListener('change', (e) => this.handleImportFile(e));
40
+
41
+ // Session header click
42
+ document.querySelector('.sessions-header')?.addEventListener('click', () => {
43
+ document.getElementById('sessionsToggle')?.click();
44
+ });
45
+
46
+ Logger.debug('Session', 'Event listeners set up');
47
+ }
48
+
49
+ startNewSession() {
50
+ Logger.debug('Session', 'Start New Session button clicked');
51
+ this.resetToNewSessionState();
52
+ domManager.updateStatus('Ready to start a new session', 'success');
53
+ }
54
+
55
+ resetToNewSessionState() {
56
+ console.log('[DEBUG] Resetting to new session state');
57
+
58
+ // Save current session before resetting if we have one
59
+ if (appState.currentSessionData) {
60
+ console.log('[DEBUG] Saving current session before reset');
61
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
62
+ // Update the current state
63
+ appState.currentSessionData.problemText = domManager.getElement('questionInput')?.value.trim() || '';
64
+ const imageElement = domManager.getElement('imagePreview');
65
+ appState.currentSessionData.image = imageElement?.style.display !== 'none' ? imageElement.src : null;
66
+ appState.currentSessionData.title = this.generateSessionTitle(appState.currentSessionData.problemText);
67
+ this.saveCurrentSessionToStorage();
68
+ }
69
+
70
+ // Reset session management state
71
+ appState.selectedSessionId = null;
72
+ appState.currentSessionData = null;
73
+
74
+ // Clear visual selection
75
+ document.querySelectorAll('.session-item').forEach(item => {
76
+ item.classList.remove('selected');
77
+ });
78
+
79
+ // Clear inputs and make them editable
80
+ this.clearAndEnableInputs();
81
+
82
+ // Clear chat and restore welcome message properly
83
+ messageManager.clearChatAndRestoreWelcome();
84
+
85
+ // Clear any existing feedback panels from previous sessions
86
+ if (window.interactiveFeedback) {
87
+ window.interactiveFeedback.removeFeedbackPanel();
88
+ window.interactiveFeedback.removeRestoreButton();
89
+ }
90
+
91
+ // Clear any final solution artifacts panels
92
+ document.querySelectorAll('.final-artifacts-compact').forEach(panel => {
93
+ panel.remove();
94
+ });
95
+
96
+ // Clear per-session custom rules
97
+ import('./settings-manager.js').then(({ settingsManager }) => {
98
+ settingsManager.clearPerSessionRules();
99
+ });
100
+
101
+ this.updateCurrentSessionDisplay();
102
+ console.log('[DEBUG] Reset to new session state completed');
103
+ }
104
+
105
+ clearAndEnableInputs() {
106
+ // Clear inputs
107
+ domManager.clearInputs();
108
+
109
+ // Enable and reset input field to editable state
110
+ const questionInputElement = domManager.getElement('questionInput');
111
+ const solveBtnElement = domManager.getElement('solveBtn');
112
+
113
+ if (questionInputElement) {
114
+ questionInputElement.disabled = false;
115
+ questionInputElement.style.backgroundColor = '';
116
+ questionInputElement.style.cursor = '';
117
+ questionInputElement.title = '';
118
+ questionInputElement.placeholder = "Enter your problem here... (e.g., 'What is the square root of 144?', 'Solve this math puzzle', etc.)";
119
+ }
120
+
121
+ if (solveBtnElement && !appState.isSolving) {
122
+ solveBtnElement.style.display = 'inline-flex';
123
+ solveBtnElement.disabled = false;
124
+ solveBtnElement.title = '';
125
+ }
126
+
127
+ // Remove any read-only messages
128
+ this.removeReadOnlyMessage();
129
+
130
+ // Replace feather icons
131
+ if (typeof feather !== 'undefined') {
132
+ feather.replace();
133
+ }
134
+ }
135
+
136
+ setInputsReadOnly(reason = 'This session has been used and is now read-only') {
137
+ const questionInputElement = domManager.getElement('questionInput');
138
+ const solveBtnElement = domManager.getElement('solveBtn');
139
+
140
+ if (questionInputElement) {
141
+ questionInputElement.disabled = true;
142
+ questionInputElement.style.backgroundColor = 'var(--gray-100)';
143
+ questionInputElement.style.cursor = 'not-allowed';
144
+ questionInputElement.title = reason;
145
+ questionInputElement.placeholder = 'This session is read-only. Start a new session to solve another problem.';
146
+ }
147
+
148
+ if (solveBtnElement) {
149
+ solveBtnElement.style.display = 'none';
150
+ solveBtnElement.disabled = true;
151
+ }
152
+
153
+ // Add read-only message
154
+ this.showReadOnlyMessage();
155
+ }
156
+
157
+ showReadOnlyMessage() {
158
+ // Remove any existing message first
159
+ this.removeReadOnlyMessage();
160
+
161
+ const messageEl = document.createElement('div');
162
+ messageEl.className = 'session-readonly-message';
163
+ messageEl.style.cssText = `
164
+ background: var(--warning-50);
165
+ border: 1px solid var(--warning-200);
166
+ border-radius: 8px;
167
+ padding: 12px;
168
+ margin-top: 8px;
169
+ font-size: 13px;
170
+ color: var(--warning-700);
171
+ text-align: center;
172
+ `;
173
+ messageEl.innerHTML = `
174
+ <i data-feather="info" style="width: 14px; height: 14px; margin-right: 6px;"></i>
175
+ This session is read-only. Click "Start New Session" to solve a new problem.
176
+ `;
177
+
178
+ // Add message after button group
179
+ const buttonGroup = document.querySelector('.button-group');
180
+ if (buttonGroup) {
181
+ buttonGroup.insertAdjacentElement('afterend', messageEl);
182
+
183
+ if (typeof feather !== 'undefined') {
184
+ feather.replace(messageEl);
185
+ }
186
+ }
187
+ }
188
+
189
+ removeReadOnlyMessage() {
190
+ const message = document.querySelector('.session-readonly-message');
191
+ if (message) {
192
+ message.remove();
193
+ }
194
+ }
195
+
196
+ isSessionUsed(session) {
197
+ // A session is considered "used" (read-only) only if it has been
198
+ // finished or explicitly interrupted. This mirrors the logic that
199
+ // lives in the inline implementation inside index.html. Active or
200
+ // in-progress ("solving") sessions remain editable even if they have
201
+ // chat history.
202
+ const readOnlyStatuses = ['completed', 'interrupted'];
203
+ return readOnlyStatuses.includes(session?.status);
204
+ }
205
+
206
+ toggleSessions() {
207
+ appState.sessionsExpanded = !appState.sessionsExpanded;
208
+
209
+ const sessionsContainer = domManager.getElement('sessionsContainer');
210
+ const sessionsToggle = domManager.getElement('sessionsToggle');
211
+
212
+ if (appState.sessionsExpanded) {
213
+ sessionsContainer?.classList.add('expanded');
214
+ sessionsToggle?.classList.add('expanded');
215
+ } else {
216
+ sessionsContainer?.classList.remove('expanded');
217
+ sessionsToggle?.classList.remove('expanded');
218
+ }
219
+
220
+ Logger.debug('Session', `Sessions panel ${appState.sessionsExpanded ? 'expanded' : 'collapsed'}`);
221
+ }
222
+
223
+ clearAllSessions() {
224
+ if (confirm('Are you sure you want to clear all session history? This cannot be undone.')) {
225
+ try {
226
+ storageManager.clearAllSessions();
227
+ this.refreshSessionsList();
228
+ domManager.updateStatus('All sessions cleared', 'success');
229
+ Logger.debug('Session', 'All sessions cleared by user');
230
+ } catch (error) {
231
+ Logger.error('Session', 'Error clearing sessions:', error);
232
+ domManager.updateStatus('Error clearing sessions', 'error');
233
+ }
234
+ }
235
+ }
236
+
237
+ exportSessions() {
238
+ try {
239
+ const result = storageManager.exportSessions();
240
+ if (result) {
241
+ const sessions = storageManager.loadSessions();
242
+ const defaultSessionIds = storageManager.getDefaultSessionIds();
243
+ const userSessionCount = Object.keys(sessions).length - defaultSessionIds.length;
244
+
245
+ if (userSessionCount > 0) {
246
+ domManager.updateStatus(`Exported ${userSessionCount} user session(s) successfully`, 'success');
247
+ } else {
248
+ domManager.updateStatus('No user sessions to export (default sessions are excluded)', 'info');
249
+ }
250
+ Logger.debug('Session', `Sessions exported by user: ${userSessionCount} user sessions`);
251
+ } else {
252
+ domManager.updateStatus('Error exporting sessions', 'error');
253
+ }
254
+ } catch (error) {
255
+ Logger.error('Session', 'Error exporting sessions:', error);
256
+ domManager.updateStatus('Error exporting sessions', 'error');
257
+ }
258
+ }
259
+
260
+ triggerImportSessions() {
261
+ const fileInput = domManager.getElement('importSessionsInput');
262
+ if (fileInput) {
263
+ fileInput.click();
264
+ }
265
+ }
266
+
267
+ async handleImportFile(event) {
268
+ const file = event.target.files[0];
269
+ if (!file) {
270
+ return;
271
+ }
272
+
273
+ // Validate file type
274
+ if (!file.name.endsWith('.json')) {
275
+ domManager.updateStatus('Please select a JSON file', 'warning');
276
+ return;
277
+ }
278
+
279
+ try {
280
+ domManager.updateStatus('Importing sessions...', 'info');
281
+
282
+ // Read file content
283
+ const fileContent = await this.readFileAsText(file);
284
+
285
+ // Import sessions with merge enabled, no duplicates overwrite by default
286
+ const result = await storageManager.importSessions(fileContent, {
287
+ merge: true,
288
+ overwriteDuplicates: false
289
+ });
290
+
291
+ // Handle results
292
+ if (result.imported > 0) {
293
+ this.refreshSessionsList();
294
+
295
+ let message = `Successfully imported ${result.imported} session(s)`;
296
+ if (result.skipped > 0) {
297
+ message += ` (${result.skipped} skipped due to duplicates)`;
298
+ }
299
+
300
+ domManager.updateStatus(message, 'success');
301
+ Logger.debug('Session', `Import completed: ${result.imported} imported, ${result.skipped} skipped`);
302
+
303
+ // Show detailed summary if there were duplicates
304
+ if (result.duplicates > 0) {
305
+ const shouldOverwrite = confirm(
306
+ `Found ${result.duplicates} duplicate session(s). ` +
307
+ `Would you like to overwrite them with the imported versions?`
308
+ );
309
+
310
+ if (shouldOverwrite) {
311
+ const overwriteResult = await storageManager.importSessions(fileContent, {
312
+ merge: true,
313
+ overwriteDuplicates: true
314
+ });
315
+
316
+ this.refreshSessionsList();
317
+ domManager.updateStatus(
318
+ `Import completed: ${overwriteResult.imported} sessions imported (including overwrites)`,
319
+ 'success'
320
+ );
321
+ }
322
+ }
323
+ } else if (result.skipped > 0) {
324
+ domManager.updateStatus('No new sessions imported - all sessions already exist', 'warning');
325
+ } else {
326
+ domManager.updateStatus('No valid sessions found in file', 'warning');
327
+ }
328
+
329
+ } catch (error) {
330
+ Logger.error('Session', 'Error importing sessions:', error);
331
+
332
+ let errorMessage = 'Error importing sessions';
333
+ if (error.message.includes('Invalid import data')) {
334
+ errorMessage = 'Invalid file format - please select a valid PIPS session export file';
335
+ } else if (error.message.includes('JSON')) {
336
+ errorMessage = 'Invalid JSON file format';
337
+ }
338
+
339
+ domManager.updateStatus(errorMessage, 'error');
340
+ } finally {
341
+ // Clear the file input
342
+ event.target.value = '';
343
+ }
344
+ }
345
+
346
+ readFileAsText(file) {
347
+ return new Promise((resolve, reject) => {
348
+ const reader = new FileReader();
349
+ reader.onload = (e) => resolve(e.target.result);
350
+ reader.onerror = (e) => reject(new Error('Failed to read file'));
351
+ reader.readAsText(file);
352
+ });
353
+ }
354
+
355
+ downloadSingleSession(sessionId) {
356
+ try {
357
+ const success = storageManager.exportSingleSession(sessionId);
358
+ if (success) {
359
+ domManager.updateStatus('Session downloaded successfully', 'success');
360
+ Logger.debug('Session', `Single session ${sessionId} exported by user`);
361
+ } else {
362
+ domManager.updateStatus('Error: Session not found', 'error');
363
+ }
364
+ } catch (error) {
365
+ Logger.error('Session', 'Error downloading session:', error);
366
+ domManager.updateStatus('Error downloading session', 'error');
367
+ }
368
+ }
369
+
370
+ // Session data management
371
+ saveCurrentSessionToStorage() {
372
+ if (!appState.currentSessionData) {
373
+ console.log('[DEBUG] No current session data to save');
374
+ return;
375
+ }
376
+
377
+ // Get current state from UI
378
+ const problemText = domManager.getElement('questionInput')?.value.trim() || '';
379
+ const imageElement = domManager.getElement('imagePreview');
380
+ const image = imageElement?.style.display !== 'none' ? imageElement.src : null;
381
+
382
+ // Update session data
383
+ appState.currentSessionData.problemText = problemText;
384
+ appState.currentSessionData.image = image;
385
+ appState.currentSessionData.title = this.generateSessionTitle(problemText);
386
+
387
+ // Always update lastUsed when saving
388
+ appState.currentSessionData.lastUsed = new Date().toISOString();
389
+
390
+ // Get current chat history (this is critical for persistence)
391
+ const chatHistory = messageManager.getCurrentChatHistory();
392
+ appState.currentSessionData.chatHistory = chatHistory;
393
+
394
+ console.log(`[DEBUG] Saving session ${appState.currentSessionData.id}:`);
395
+ console.log(`[DEBUG] - Title: ${appState.currentSessionData.title}`);
396
+ console.log(`[DEBUG] - Problem text length: ${problemText.length}`);
397
+ console.log(`[DEBUG] - Chat history messages: ${chatHistory.length}`);
398
+ if (chatHistory.length > 0) {
399
+ console.log(`[DEBUG] - Sample message: ${chatHistory[0].sender} - ${chatHistory[0].content.substring(0, 50)}...`);
400
+ }
401
+
402
+ // Save to storage
403
+ storageManager.saveSession(appState.currentSessionData.id, appState.currentSessionData);
404
+
405
+ console.log(`[DEBUG] Successfully saved session: ${appState.currentSessionData.id} with ${appState.currentSessionData.chatHistory.length} messages`);
406
+ }
407
+
408
+ generateSessionTitle(problemText) {
409
+ if (!problemText || problemText.trim() === '') {
410
+ return 'Untitled Session';
411
+ }
412
+
413
+ // Take first meaningful part of the problem text
414
+ const cleaned = problemText.trim().replace(/\s+/g, ' ');
415
+ const maxLength = 50;
416
+
417
+ if (cleaned.length <= maxLength) {
418
+ return cleaned;
419
+ }
420
+
421
+ // Try to break at word boundaries
422
+ const truncated = cleaned.substring(0, maxLength);
423
+ const lastSpace = truncated.lastIndexOf(' ');
424
+
425
+ if (lastSpace > maxLength * 0.6) {
426
+ return truncated.substring(0, lastSpace) + '...';
427
+ }
428
+
429
+ return truncated + '...';
430
+ }
431
+
432
+ createNewSession(problemText, image = null) {
433
+ const sessionId = this.generateSessionId();
434
+ const now = new Date().toISOString();
435
+
436
+ // Validate that we have meaningful content before creating a session
437
+ const hasContent = problemText && problemText.trim().length > 0;
438
+ const title = hasContent ? this.generateSessionTitle(problemText) : 'Untitled Session';
439
+
440
+ const newSession = {
441
+ id: sessionId,
442
+ title: title,
443
+ problemText: problemText || '',
444
+ image: image,
445
+ createdAt: now,
446
+ lastUsed: now,
447
+ status: 'active',
448
+ chatHistory: []
449
+ };
450
+
451
+ console.log(`[DEBUG] Created new session: ${sessionId}, title: "${title}", hasContent: ${hasContent}`);
452
+ return newSession;
453
+ }
454
+
455
+ generateSessionId() {
456
+ return 'session_' + Math.random().toString(36).substr(2, 16) + '_' + Date.now();
457
+ }
458
+
459
+ switchToSession(sessionId) {
460
+ console.log(`[DEBUG] Switching to session: ${sessionId}`);
461
+
462
+ // Critical: Handle edge case - prevent switching while solving
463
+ if (appState.isSolving) {
464
+ domManager.updateStatus('Cannot switch sessions while solving. Please stop the current task first.', 'warning');
465
+ return;
466
+ }
467
+
468
+ // Prevent multiple simultaneous switches
469
+ if (window.sessionSwitchInProgress) {
470
+ console.log('[DEBUG] Session switch already in progress, ignoring');
471
+ return;
472
+ }
473
+ window.sessionSwitchInProgress = true;
474
+
475
+ try {
476
+ // Save current session state if we have one
477
+ if (appState.currentSessionData) {
478
+ console.log('[DEBUG] Saving current session state before switching');
479
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
480
+ // Update the current state
481
+ appState.currentSessionData.problemText = domManager.getElement('questionInput')?.value.trim() || '';
482
+ const imageElement = domManager.getElement('imagePreview');
483
+ appState.currentSessionData.image = imageElement?.style.display !== 'none' ? imageElement.src : null;
484
+ appState.currentSessionData.title = this.generateSessionTitle(appState.currentSessionData.problemText);
485
+ this.saveCurrentSessionToStorage();
486
+ }
487
+
488
+ // Load the selected session - use the same logic as refreshSessionsList for consistency
489
+ let sessions = storageManager.loadSessions();
490
+ console.log(`[DEBUG] Loaded sessions from storage:`, Object.keys(sessions));
491
+
492
+ // Create the same combined sessions that the UI uses
493
+ const allSessions = { ...sessions };
494
+ if (appState.currentSessionData && appState.currentSessionData.id) {
495
+ allSessions[appState.currentSessionData.id] = appState.currentSessionData;
496
+ console.log(`[DEBUG] Added current session to combined sessions: ${appState.currentSessionData.id}`);
497
+ }
498
+
499
+ console.log(`[DEBUG] All available sessions:`, Object.keys(allSessions));
500
+
501
+ // Debug: Show details about each available session
502
+ Object.entries(allSessions).forEach(([id, sess]) => {
503
+ console.log(`[DEBUG] Session ${id}: title="${sess.title}", status="${sess.status}"`);
504
+ });
505
+
506
+ let session = allSessions[sessionId];
507
+
508
+ if (!session) {
509
+ console.error(`[DEBUG] Session not found: ${sessionId}`);
510
+ console.error(`[DEBUG] Available sessions:`, Object.keys(allSessions));
511
+ console.error(`[DEBUG] Current session in state:`, appState.currentSessionData?.id);
512
+ domManager.updateStatus('Session not found', 'error');
513
+ return;
514
+ }
515
+
516
+ console.log(`[DEBUG] Found session: ${sessionId}, status: ${session.status}, title: ${session.title}`);
517
+
518
+ console.log(`[DEBUG] Loading session: ${sessionId} with ${session.chatHistory ? session.chatHistory.length : 0} messages`);
519
+
520
+ // Update state WITHOUT updating lastUsed to prevent reorganization on view
521
+ appState.selectedSessionId = sessionId;
522
+ appState.currentSessionData = { ...session };
523
+
524
+ // Clear ALL selections first, then set the correct one
525
+ document.querySelectorAll('.session-item').forEach(item => {
526
+ item.classList.remove('selected');
527
+ });
528
+
529
+ // Set selection on the clicked session
530
+ const targetElement = document.querySelector(`[data-session-id="${sessionId}"]`);
531
+ if (targetElement) {
532
+ console.log(`[DEBUG] Setting selected class on session: ${sessionId}`);
533
+ targetElement.classList.add('selected');
534
+ } else {
535
+ console.error(`[DEBUG] Target element not found for session: ${sessionId}`);
536
+ // Try again after a brief delay in case DOM is updating
537
+ setTimeout(() => {
538
+ const retryElement = document.querySelector(`[data-session-id="${sessionId}"]`);
539
+ if (retryElement) {
540
+ retryElement.classList.add('selected');
541
+ console.log(`[DEBUG] Successfully set selected class on retry`);
542
+ }
543
+ }, 50);
544
+ }
545
+
546
+ // Load session data into UI
547
+ const questionInput = domManager.getElement('questionInput');
548
+ if (questionInput) {
549
+ questionInput.value = session.problemText || '';
550
+ }
551
+
552
+ // Check if session is used/read-only
553
+ const isUsedSession = this.isSessionUsed(session);
554
+
555
+ if (isUsedSession) {
556
+ // Make session read-only
557
+ this.setInputsReadOnly(`This session is ${session.status || 'used'}. Start a new session to solve another problem.`);
558
+ domManager.updateStatus(`Viewing ${session.status || 'used'} session (read-only)`, 'info');
559
+ console.log(`[DEBUG] Session ${sessionId} is read-only (status: ${session.status})`);
560
+ } else {
561
+ // Enable editing for fresh sessions
562
+ this.clearAndEnableInputs();
563
+ console.log(`[DEBUG] Session ${sessionId} is editable (status: ${session.status})`);
564
+ }
565
+
566
+ // Load image if present
567
+ imageHandler.loadSessionImage(session.image);
568
+
569
+ // Load chat history
570
+ messageManager.loadChatHistory(session.chatHistory || []);
571
+
572
+ domManager.updateStatus(`Switched to session: ${session.title}`, 'success');
573
+
574
+ } catch (error) {
575
+ console.error('[DEBUG] Error in switchToSession:', error);
576
+ domManager.updateStatus('Error switching to session', 'error');
577
+ } finally {
578
+ // Always clear the switch lock
579
+ setTimeout(() => {
580
+ window.sessionSwitchInProgress = false;
581
+ }, 100);
582
+ }
583
+ }
584
+
585
+ deleteSession(sessionId, event) {
586
+ if (event) {
587
+ event.stopPropagation();
588
+ }
589
+
590
+ console.log(`[DEBUG] Attempting to delete session: ${sessionId}`);
591
+
592
+ if (confirm('Are you sure you want to delete this session?')) {
593
+ try {
594
+ // Load sessions from storage
595
+ const sessions = storageManager.loadSessions();
596
+ console.log(`[DEBUG] Loaded ${Object.keys(sessions).length} sessions from storage`);
597
+
598
+ // Delete from storage
599
+ const sessionExistsInStorage = sessions.hasOwnProperty(sessionId);
600
+ if (sessionExistsInStorage) {
601
+ delete sessions[sessionId];
602
+ storageManager.saveSessions(sessions);
603
+ console.log(`[DEBUG] Deleted session ${sessionId} from storage`);
604
+ } else {
605
+ console.log(`[DEBUG] Session ${sessionId} not found in storage`);
606
+ }
607
+
608
+ // If this is the current session in memory, clear it
609
+ if (appState.currentSessionData && appState.currentSessionData.id === sessionId) {
610
+ console.log(`[DEBUG] Deleting current session from memory: ${sessionId}`);
611
+ appState.currentSessionData = null;
612
+ appState.selectedSessionId = null;
613
+
614
+ // Clear inputs and UI
615
+ domManager.clearInputs();
616
+ imageHandler.clearImage();
617
+ messageManager.clearChatAndRestoreWelcome();
618
+ this.clearAndEnableInputs();
619
+
620
+ // Clear any final solution artifacts panels
621
+ document.querySelectorAll('.final-artifacts-compact').forEach(panel => {
622
+ panel.remove();
623
+ });
624
+ }
625
+
626
+ // If this was the selected session, clear selection
627
+ if (appState.selectedSessionId === sessionId) {
628
+ console.log(`[DEBUG] Clearing selected session: ${sessionId}`);
629
+ appState.selectedSessionId = null;
630
+ }
631
+
632
+ // Force remove the DOM element immediately to provide instant feedback
633
+ const sessionElement = document.querySelector(`[data-session-id="${sessionId}"]`);
634
+ if (sessionElement) {
635
+ sessionElement.remove();
636
+ console.log(`[DEBUG] Removed DOM element for session: ${sessionId}`);
637
+ }
638
+
639
+ // Refresh the sessions list
640
+ this.refreshSessionsList();
641
+
642
+ domManager.updateStatus('Session deleted successfully', 'success');
643
+ console.log(`[DEBUG] Session deletion completed: ${sessionId}`);
644
+
645
+ } catch (error) {
646
+ console.error(`[DEBUG] Error deleting session ${sessionId}:`, error);
647
+ domManager.updateStatus('Error deleting session', 'error');
648
+ }
649
+ }
650
+ }
651
+
652
+ refreshSessionsList() {
653
+ console.log('[DEBUG] Updating sessions list');
654
+
655
+ const sessionsList = domManager.getElement('sessionsList');
656
+
657
+ if (!sessionsList) {
658
+ console.error('[DEBUG] Sessions list element not found');
659
+ return;
660
+ }
661
+
662
+ try {
663
+ // Ensure current session is saved to storage before refreshing list
664
+ if (appState.currentSessionData && appState.currentSessionData.id) {
665
+ console.log('[DEBUG] Ensuring current session is saved before refresh');
666
+ this.saveCurrentSessionToStorage();
667
+ }
668
+
669
+ const storedSessions = storageManager.loadSessions();
670
+ console.log(`[DEBUG] Loaded ${Object.keys(storedSessions).length} sessions from storage`);
671
+
672
+ // Automatically clean up ghost sessions from storage
673
+ this.cleanupGhostSessionsFromStorage(storedSessions);
674
+
675
+ // Combine stored sessions with current session if it exists
676
+ const allSessions = { ...storedSessions };
677
+ if (appState.currentSessionData && appState.currentSessionData.id) {
678
+ // Always include current session in the list, overriding stored version
679
+ allSessions[appState.currentSessionData.id] = appState.currentSessionData;
680
+ console.log(`[DEBUG] Including current session in list: ${appState.currentSessionData.id}`);
681
+ }
682
+
683
+ // Convert sessions object to array and sort by creation time (newest first)
684
+ const sessionsArray = Object.values(allSessions).filter(session => {
685
+ // Filter out invalid sessions and ghost sessions
686
+ if (!session || !session.id) {
687
+ console.log('[DEBUG] Filtering out session without ID:', session);
688
+ return false;
689
+ }
690
+
691
+ // Filter out ghost sessions (much more aggressive filtering)
692
+ const isGhostSession = (
693
+ (!session.title || session.title === 'Untitled Session' || session.title.trim() === '') &&
694
+ (!session.chatHistory || session.chatHistory.length === 0) &&
695
+ (!session.problemText || session.problemText.trim() === '') &&
696
+ (!session.image || session.image === null)
697
+ );
698
+
699
+ // Also filter out sessions with "solving" status but no actual content and are old
700
+ const isStuckSolvingSession = (
701
+ session.status === 'solving' &&
702
+ (!session.chatHistory || session.chatHistory.length === 0) &&
703
+ (!session.problemText || session.problemText.trim() === '') &&
704
+ Date.now() - new Date(session.createdAt || 0).getTime() > 60000 // 1 minute old
705
+ );
706
+
707
+ if (isGhostSession) {
708
+ console.log('[DEBUG] Filtering out ghost session:', session.id, session.title);
709
+ return false;
710
+ }
711
+
712
+ if (isStuckSolvingSession) {
713
+ console.log('[DEBUG] Filtering out stuck solving session:', session.id, session.title);
714
+ return false;
715
+ }
716
+
717
+ return true;
718
+ }).sort((a, b) => {
719
+ // Primary sort: creation time (newest first)
720
+ const createdA = new Date(a.createdAt || 0);
721
+ const createdB = new Date(b.createdAt || 0);
722
+
723
+ if (createdB - createdA !== 0) {
724
+ return createdB - createdA;
725
+ }
726
+
727
+ // Secondary sort (tie-breaker): lastUsed (newest first)
728
+ const usedA = new Date(a.lastUsed || 0);
729
+ const usedB = new Date(b.lastUsed || 0);
730
+ return usedB - usedA;
731
+ });
732
+
733
+ console.log(`[DEBUG] Filtered and sorted ${sessionsArray.length} sessions`);
734
+
735
+ // Track which session elements need to be created
736
+ const sessionElementsToAdd = [];
737
+
738
+ // Update existing elements and identify new ones
739
+ sessionsArray.forEach(session => {
740
+ const existingElement = sessionsList.querySelector(`[data-session-id="${session.id}"]`);
741
+
742
+ if (existingElement) {
743
+ // Update existing element in place
744
+ this.updateSessionElement(existingElement, session);
745
+ } else {
746
+ // Create new element
747
+ const sessionElement = this.createSessionElement(session);
748
+ if (sessionElement) {
749
+ sessionElementsToAdd.push(sessionElement);
750
+ }
751
+ }
752
+ });
753
+
754
+ // Add new elements in sorted order
755
+ sessionElementsToAdd.forEach(element => {
756
+ sessionsList.appendChild(element);
757
+ });
758
+
759
+ // Reorder elements according to sort order
760
+ const orderedElements = [];
761
+ sessionsArray.forEach(session => {
762
+ const element = sessionsList.querySelector(`[data-session-id="${session.id}"]`);
763
+ if (element) {
764
+ orderedElements.push(element);
765
+ }
766
+ });
767
+
768
+ // Remove orphaned DOM elements (sessions that no longer exist in data)
769
+ const existingElements = sessionsList.querySelectorAll('.session-item');
770
+ const validSessionIds = new Set(sessionsArray.map(s => s.id));
771
+
772
+ existingElements.forEach(element => {
773
+ const elementSessionId = element.getAttribute('data-session-id');
774
+ if (!validSessionIds.has(elementSessionId)) {
775
+ console.log(`[DEBUG] Removing orphaned session element: ${elementSessionId}`);
776
+ element.remove();
777
+ }
778
+ });
779
+
780
+ // Reorder DOM elements
781
+ orderedElements.forEach(element => {
782
+ sessionsList.appendChild(element);
783
+ });
784
+
785
+ // Update selection after reordering
786
+ if (appState.selectedSessionId && appState.currentSessionData) {
787
+ // Clear all selections first
788
+ document.querySelectorAll('.session-item').forEach(item => {
789
+ item.classList.remove('selected');
790
+ });
791
+
792
+ // Set selection on the currently selected session
793
+ const selectedElement = sessionsList.querySelector(`[data-session-id="${appState.selectedSessionId}"]`);
794
+ if (selectedElement) {
795
+ selectedElement.classList.add('selected');
796
+ console.log(`[DEBUG] Set selection on session: ${appState.selectedSessionId}`);
797
+ }
798
+ }
799
+
800
+ // Update session count in header
801
+ const totalSessions = sessionsArray.length;
802
+ console.log(`[DEBUG] Total sessions for header: ${totalSessions}`);
803
+ this.updateSessionsHeader(totalSessions);
804
+
805
+ // Replace feather icons for newly added session elements only
806
+ try {
807
+ sessionElementsToAdd.forEach(element => {
808
+ if (typeof feather !== 'undefined') {
809
+ feather.replace(element);
810
+ }
811
+ });
812
+ } catch (e) {
813
+ console.warn('[DEBUG] Could not replace feather icons in new session elements:', e);
814
+ }
815
+
816
+ // Final cleanup: ensure no stuck spinner sessions remain in the UI
817
+ this.removeStuckSpinnerElements();
818
+
819
+ } catch (error) {
820
+ console.error('[DEBUG] Error in refreshSessionsList:', error);
821
+ }
822
+ }
823
+
824
+ // Remove any UI elements that still have spinners but shouldn't
825
+ removeStuckSpinnerElements() {
826
+ const sessionsList = domManager.getElement('sessionsList');
827
+ if (!sessionsList) return;
828
+
829
+ const sessionElements = sessionsList.querySelectorAll('.session-item');
830
+ sessionElements.forEach(element => {
831
+ const sessionId = element.getAttribute('data-session-id');
832
+ const icon = element.querySelector('[data-feather="loader"]');
833
+
834
+ // If element has a spinner icon but no corresponding valid session data, remove it
835
+ if (icon && sessionId) {
836
+ const sessions = storageManager.loadSessions();
837
+ const allSessions = { ...sessions };
838
+ if (appState.currentSessionData && appState.currentSessionData.id) {
839
+ allSessions[appState.currentSessionData.id] = appState.currentSessionData;
840
+ }
841
+
842
+ const session = allSessions[sessionId];
843
+ if (!session ||
844
+ (!session.problemText && !session.chatHistory?.length && session.status !== 'solving')) {
845
+ console.log('[DEBUG] Removing stuck spinner element:', sessionId);
846
+ element.remove();
847
+ }
848
+ }
849
+ });
850
+ }
851
+
852
+ updateSessionElement(element, session) {
853
+ if (!element || !session) return;
854
+
855
+ // Update status-based styling
856
+ element.className = 'session-item'; // Reset classes
857
+ if (session.status === 'completed') {
858
+ element.classList.add('completed-session');
859
+ } else if (session.status === 'interrupted') {
860
+ element.classList.add('interrupted-session');
861
+ } else if (session.status === 'solving') {
862
+ element.classList.add('solving-session');
863
+ }
864
+
865
+ // Determine icon based on status
866
+ let iconName = 'file-text';
867
+ if (session.status === 'completed') iconName = 'check-circle';
868
+ else if (session.status === 'interrupted') iconName = 'x-circle';
869
+ else if (session.status === 'solving') iconName = 'loader';
870
+
871
+ // Handle date safely
872
+ let timeAgo = 'Unknown time';
873
+ try {
874
+ const displayDate = new Date(session.lastUsed || session.createdAt);
875
+ timeAgo = this.getTimeAgo(displayDate);
876
+ } catch (e) {
877
+ console.warn('[DEBUG] Invalid date for session:', session.id, session.lastUsed, session.createdAt);
878
+ }
879
+
880
+ // Handle message count safely
881
+ const messageCount = session.chatHistory ? session.chatHistory.length : 0;
882
+ const messageText = messageCount === 1 ? 'message' : 'messages';
883
+
884
+ // Handle title safely
885
+ const title = session.title || 'Untitled Session';
886
+ const safeTitle = this.escapeHtml(title);
887
+
888
+ // Update icon - force complete refresh for reliability
889
+ const iconContainer = element.querySelector('.session-icon');
890
+ if (iconContainer) {
891
+ const currentIcon = iconContainer.querySelector('i, svg');
892
+ const currentIconName = currentIcon ? currentIcon.getAttribute('data-feather') : 'unknown';
893
+ console.log(`[DEBUG] Updating session ${session.id} icon from ${currentIconName} to ${iconName} (status: ${session.status})`);
894
+
895
+ // Always force refresh the icon to ensure proper updating
896
+ iconContainer.innerHTML = `<i data-feather="${iconName}" style="width: 16px; height: 16px;"></i>`;
897
+ console.log(`[DEBUG] Force replaced icon container for session ${session.id}`);
898
+ }
899
+
900
+ // Update title and meta
901
+ const titleElement = element.querySelector('.session-title');
902
+ const metaElement = element.querySelector('.session-meta');
903
+ if (titleElement) titleElement.textContent = title;
904
+ if (metaElement) metaElement.textContent = `${timeAgo} • ${messageCount} ${messageText}`;
905
+
906
+ // Update status class
907
+ const statusElement = element.querySelector('.session-status');
908
+ if (statusElement) {
909
+ statusElement.className = `session-status ${session.status || 'active'}`;
910
+ }
911
+
912
+ // Replace feather icons for this element only with a small delay to ensure DOM update
913
+ setTimeout(() => {
914
+ try {
915
+ if (typeof feather !== 'undefined') {
916
+ feather.replace(element);
917
+ }
918
+ console.log(`[DEBUG] Feather icons replaced for session ${session.id} with status ${session.status} -> ${iconName}`);
919
+ } catch (e) {
920
+ console.warn('[DEBUG] Could not replace feather icons in updated element:', e);
921
+ }
922
+ }, 10);
923
+ }
924
+
925
+ createSessionElement(session) {
926
+ if (!session || !session.id) {
927
+ console.error('[DEBUG] Invalid session data:', session);
928
+ return null;
929
+ }
930
+
931
+ const sessionItem = document.createElement('div');
932
+ sessionItem.className = 'session-item';
933
+ sessionItem.setAttribute('data-session-id', session.id);
934
+
935
+ // Add status-based styling
936
+ if (session.status === 'completed') {
937
+ sessionItem.classList.add('completed-session');
938
+ } else if (session.status === 'interrupted') {
939
+ sessionItem.classList.add('interrupted-session');
940
+ } else if (session.status === 'solving') {
941
+ sessionItem.classList.add('solving-session');
942
+ }
943
+
944
+ // Determine icon based on status
945
+ let iconName = 'file-text';
946
+ if (session.status === 'completed') iconName = 'check-circle';
947
+ else if (session.status === 'interrupted') iconName = 'x-circle';
948
+ else if (session.status === 'solving') iconName = 'loader';
949
+
950
+ // Handle date safely
951
+ let timeAgo = 'Unknown time';
952
+ try {
953
+ const displayDate = new Date(session.lastUsed || session.createdAt);
954
+ timeAgo = this.getTimeAgo(displayDate);
955
+ } catch (e) {
956
+ console.warn('[DEBUG] Invalid date for session:', session.id, session.lastUsed, session.createdAt);
957
+ }
958
+
959
+ // Handle message count safely
960
+ const messageCount = session.chatHistory ? session.chatHistory.length : 0;
961
+ const messageText = messageCount === 1 ? 'message' : 'messages';
962
+
963
+ // Handle title safely
964
+ const title = session.title || 'Untitled Session';
965
+ const safeTitle = this.escapeHtml(title);
966
+
967
+ sessionItem.innerHTML = `
968
+ <div class="session-icon">
969
+ <i data-feather="${iconName}" style="width: 16px; height: 16px;"></i>
970
+ </div>
971
+ <div class="session-info">
972
+ <div class="session-title">${safeTitle}</div>
973
+ <div class="session-meta">${timeAgo} • ${messageCount} ${messageText}</div>
974
+ </div>
975
+ <div class="session-status ${session.status || 'active'}">
976
+ <span class="status-dot"></span>
977
+ </div>
978
+ <div class="session-actions">
979
+ <button class="session-download" title="Download this session">
980
+ <i data-feather="download" style="width: 12px; height: 12px;"></i>
981
+ </button>
982
+ <button class="session-delete" title="Delete session">
983
+ <i data-feather="x" style="width: 12px; height: 12px;"></i>
984
+ </button>
985
+ </div>
986
+ `;
987
+
988
+ // Add click handler for session switching
989
+ sessionItem.addEventListener('click', (e) => {
990
+ try {
991
+ console.log(`[DEBUG] Session item clicked: ${session.id}`, session.title);
992
+
993
+ if (!e.target.closest('.session-delete') && !e.target.closest('.session-download')) {
994
+ // Prevent multiple rapid clicks
995
+ if (sessionItem.dataset.switching === 'true') {
996
+ console.log('[DEBUG] Session switch already in progress, ignoring click');
997
+ return;
998
+ }
999
+
1000
+ sessionItem.dataset.switching = 'true';
1001
+
1002
+ setTimeout(() => {
1003
+ this.switchToSession(session.id);
1004
+ sessionItem.dataset.switching = 'false';
1005
+ }, 10);
1006
+ } else {
1007
+ console.log(`[DEBUG] Action button clicked, not switching session`);
1008
+ }
1009
+ } catch (error) {
1010
+ console.error('[DEBUG] Error in session click handler:', error);
1011
+ sessionItem.dataset.switching = 'false';
1012
+ }
1013
+ });
1014
+
1015
+ // Add click handler for download button
1016
+ const downloadButton = sessionItem.querySelector('.session-download');
1017
+ downloadButton?.addEventListener('click', (e) => {
1018
+ e.stopPropagation();
1019
+ this.downloadSingleSession(session.id);
1020
+ });
1021
+
1022
+ // Add click handler for delete button
1023
+ const deleteButton = sessionItem.querySelector('.session-delete');
1024
+ deleteButton?.addEventListener('click', (e) => {
1025
+ this.deleteSession(session.id, e);
1026
+ });
1027
+
1028
+ return sessionItem;
1029
+ }
1030
+
1031
+ getTimeAgo(date) {
1032
+ if (!date) return 'Unknown time';
1033
+
1034
+ let dateObj;
1035
+ try {
1036
+ dateObj = new Date(date);
1037
+ if (isNaN(dateObj.getTime())) {
1038
+ return 'Invalid date';
1039
+ }
1040
+ } catch (e) {
1041
+ return 'Invalid date';
1042
+ }
1043
+
1044
+ const now = new Date();
1045
+ const diffMs = now - dateObj;
1046
+
1047
+ // Handle future dates
1048
+ if (diffMs < 0) {
1049
+ return 'Just now';
1050
+ }
1051
+
1052
+ const diffSecs = Math.floor(diffMs / 1000);
1053
+ const diffMins = Math.floor(diffMs / 60000);
1054
+ const diffHours = Math.floor(diffMs / 3600000);
1055
+ const diffDays = Math.floor(diffMs / 86400000);
1056
+ const diffWeeks = Math.floor(diffMs / (86400000 * 7));
1057
+ const diffMonths = Math.floor(diffMs / (86400000 * 30));
1058
+
1059
+ if (diffSecs < 30) return 'Just now';
1060
+ if (diffSecs < 60) return `${diffSecs}s ago`;
1061
+ if (diffMins < 60) return `${diffMins}m ago`;
1062
+ if (diffHours < 24) return `${diffHours}h ago`;
1063
+ if (diffDays < 7) return `${diffDays}d ago`;
1064
+ if (diffWeeks < 4) return `${diffWeeks}w ago`;
1065
+ if (diffMonths < 12) return `${diffMonths}mo ago`;
1066
+
1067
+ // For very old dates, show the actual date
1068
+ return dateObj.toLocaleDateString();
1069
+ }
1070
+
1071
+ updateSessionsHeader(totalSessions) {
1072
+ const header = document.querySelector('.sessions-header .form-label');
1073
+ if (!header) return;
1074
+
1075
+ const baseText = 'Session History';
1076
+ const sessionCount = Math.max(0, totalSessions || 0);
1077
+
1078
+ if (sessionCount === 0) {
1079
+ header.innerHTML = `
1080
+ <i data-feather="clock" style="width: 16px; height: 16px; margin-right: 8px;"></i>
1081
+ ${baseText}
1082
+ `;
1083
+ } else if (sessionCount === 1) {
1084
+ header.innerHTML = `
1085
+ <i data-feather="clock" style="width: 16px; height: 16px; margin-right: 8px;"></i>
1086
+ ${baseText} (1 session)
1087
+ `;
1088
+ } else {
1089
+ header.innerHTML = `
1090
+ <i data-feather="clock" style="width: 16px; height: 16px; margin-right: 8px;"></i>
1091
+ ${baseText} (${sessionCount} sessions)
1092
+ `;
1093
+ }
1094
+
1095
+ // Ensure feather icons are replaced
1096
+ try {
1097
+ // Only replace icons in the header area
1098
+ const headerElement = document.querySelector('.sessions-header');
1099
+ if (headerElement && typeof feather !== 'undefined') {
1100
+ feather.replace(headerElement);
1101
+ }
1102
+ } catch (e) {
1103
+ console.warn('[DEBUG] Could not replace feather icons:', e);
1104
+ }
1105
+ }
1106
+
1107
+ updateCurrentSessionDisplay() {
1108
+ // Since we've removed the separate currentSession element,
1109
+ // the session display is now handled by refreshSessionsList()
1110
+ // We can trigger a refresh of the sessions list to ensure the current session appears correctly
1111
+ this.refreshSessionsList();
1112
+ }
1113
+
1114
+ escapeHtml(text) {
1115
+ const div = document.createElement('div');
1116
+ div.textContent = text;
1117
+ return div.innerHTML;
1118
+ }
1119
+
1120
+ // Handle session-related socket events
1121
+ handleSessionConnected(data) {
1122
+ console.log('[DEBUG] SessionManager.handleSessionConnected called with data:', JSON.stringify(data));
1123
+ Logger.debug('Session', 'Session connected:', data);
1124
+ appState.currentSessionId = data.session_id;
1125
+ console.log('[DEBUG] Set appState.currentSessionId to:', data.session_id);
1126
+
1127
+ const sessionInfoText = `Session: ${data.session_id.substring(0, 8)}`;
1128
+ console.log('[DEBUG] About to update session info to:', sessionInfoText);
1129
+ domManager.updateSessionInfo(sessionInfoText);
1130
+
1131
+ console.log('[DEBUG] About to update status to: Connected - Ready to solve problems');
1132
+ domManager.updateStatus('Connected - Ready to solve problems', 'success');
1133
+ }
1134
+
1135
+ // Session creation for solving
1136
+ handleSolveProblem(problemText, imageData) {
1137
+ // Auto-create session if none exists or we're viewing a stored session
1138
+ if (!appState.currentSessionData || appState.selectedSessionId !== null) {
1139
+ // Create new session
1140
+ console.log(`[DEBUG] Creating new session (previous status: ${appState.currentSessionData?.status || 'none'})`);
1141
+ appState.currentSessionData = this.createNewSession(problemText, imageData);
1142
+ appState.selectedSessionId = null; // Set to null for current/new session
1143
+
1144
+ // Clear visual selection from stored sessions and update display
1145
+ document.querySelectorAll('.session-item').forEach(item => {
1146
+ item.classList.remove('selected');
1147
+ });
1148
+
1149
+ // Immediately save new session to storage
1150
+ this.saveCurrentSessionToStorage();
1151
+ console.log(`[DEBUG] New session created and saved with ID: ${appState.currentSessionData.id}`);
1152
+ // Ensure UI immediately reflects the newly-created session
1153
+ this.refreshSessionsList();
1154
+ } else {
1155
+ // Update existing session
1156
+ appState.currentSessionData.problemText = problemText;
1157
+ appState.currentSessionData.image = imageData;
1158
+ appState.currentSessionData.title = this.generateSessionTitle(problemText);
1159
+ // Save updated session
1160
+ this.saveCurrentSessionToStorage();
1161
+ console.log(`[DEBUG] Updated and saved existing session: ${appState.currentSessionData.id}`);
1162
+ // Update sessions list to reflect any changes to the current session
1163
+ this.refreshSessionsList();
1164
+ }
1165
+
1166
+ return appState.currentSessionData.id;
1167
+ }
1168
+
1169
+ // Handle solving state changes
1170
+ handleSolvingStarted() {
1171
+ // Save current session data including chat history BEFORE starting to solve
1172
+ if (appState.currentSessionData) {
1173
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1174
+ appState.currentSessionData.status = 'solving';
1175
+ // Update lastUsed when solving starts
1176
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1177
+ this.saveCurrentSessionToStorage();
1178
+ this.updateCurrentSessionDisplay();
1179
+ this.refreshSessionsList();
1180
+
1181
+ // Make inputs read-only once solving starts
1182
+ this.setInputsReadOnly('Cannot modify problem while solving is in progress');
1183
+
1184
+ // Add visual indicator to the current session in the unified list
1185
+ if (appState.selectedSessionId && appState.currentSessionData.id === appState.selectedSessionId) {
1186
+ const sessionElement = document.querySelector(`[data-session-id="${appState.selectedSessionId}"]`);
1187
+ if (sessionElement) {
1188
+ sessionElement.classList.add('active-solving');
1189
+ }
1190
+ }
1191
+
1192
+ // Start periodic saving during solving
1193
+ this.startPeriodicSaving();
1194
+ }
1195
+ }
1196
+
1197
+ handleSolvingComplete() {
1198
+ console.log('[DEBUG] Handling solving completed - cleaning up UI and saving session');
1199
+
1200
+ // Stop periodic saving
1201
+ this.stopPeriodicSaving();
1202
+
1203
+ // Clean up any remaining UI indicators
1204
+ messageManager.cleanupAllActiveIndicators();
1205
+
1206
+ // Update session status
1207
+ if (appState.currentSessionData) {
1208
+ appState.currentSessionData.status = 'completed';
1209
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1210
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1211
+ this.saveCurrentSessionToStorage();
1212
+ this.refreshSessionsList();
1213
+
1214
+ // Keep inputs read-only for completed sessions
1215
+ this.setInputsReadOnly('This session is completed. Start a new session to solve another problem.');
1216
+
1217
+ console.log(`[DEBUG] Session ${appState.currentSessionData.id} marked as completed and saved with ${appState.currentSessionData.chatHistory.length} messages`);
1218
+ }
1219
+ }
1220
+
1221
+ handleSolvingInterrupted() {
1222
+ console.log('[DEBUG] Handling solving interrupted - cleaning up UI and saving session');
1223
+
1224
+ // Stop periodic saving
1225
+ this.stopPeriodicSaving();
1226
+
1227
+ // CRITICAL: Clean up all UI indicators first
1228
+ messageManager.cleanupAllActiveIndicators();
1229
+
1230
+ // Update session status
1231
+ if (appState.currentSessionData) {
1232
+ appState.currentSessionData.status = 'interrupted';
1233
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1234
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1235
+ this.saveCurrentSessionToStorage();
1236
+ this.refreshSessionsList();
1237
+
1238
+ // Keep inputs read-only for interrupted sessions
1239
+ this.setInputsReadOnly('This session was interrupted. Start a new session to solve another problem.');
1240
+
1241
+ console.log(`[DEBUG] Session ${appState.currentSessionData.id} marked as interrupted and saved with ${appState.currentSessionData.chatHistory.length} messages`);
1242
+ }
1243
+ }
1244
+
1245
+ // General handler for any session failure or error
1246
+ handleSolvingError() {
1247
+ console.log('[DEBUG] Handling solving error - cleaning up UI and saving session');
1248
+
1249
+ // Stop periodic saving
1250
+ this.stopPeriodicSaving();
1251
+
1252
+ // Clean up all UI indicators
1253
+ messageManager.cleanupAllActiveIndicators();
1254
+
1255
+ // Update session status
1256
+ if (appState.currentSessionData) {
1257
+ appState.currentSessionData.status = 'interrupted';
1258
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1259
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1260
+ this.saveCurrentSessionToStorage();
1261
+ this.refreshSessionsList();
1262
+
1263
+ // Keep inputs read-only for error sessions
1264
+ this.setInputsReadOnly('This session encountered an error. Start a new session to solve another problem.');
1265
+
1266
+ console.log(`[DEBUG] Session ${appState.currentSessionData.id} marked as interrupted due to error and saved with ${appState.currentSessionData.chatHistory.length} messages`);
1267
+ }
1268
+ }
1269
+
1270
+ // Emergency cleanup method - can be called from anywhere when things go wrong
1271
+ emergencyCleanupAndSave() {
1272
+ console.log('[DEBUG] Emergency cleanup and save triggered');
1273
+
1274
+ try {
1275
+ // Clean up all UI indicators
1276
+ messageManager.cleanupAllActiveIndicators();
1277
+
1278
+ // Save whatever we have in the current session
1279
+ if (appState.currentSessionData) {
1280
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1281
+ appState.currentSessionData.status = 'interrupted';
1282
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1283
+ this.saveCurrentSessionToStorage();
1284
+ this.refreshSessionsList();
1285
+
1286
+ console.log(`[DEBUG] Emergency save completed for session ${appState.currentSessionData.id} with ${appState.currentSessionData.chatHistory.length} messages`);
1287
+ }
1288
+
1289
+ // Reset solving state
1290
+ appState.isSolving = false;
1291
+
1292
+ // Re-enable inputs
1293
+ this.clearAndEnableInputs();
1294
+
1295
+ } catch (error) {
1296
+ console.error('[DEBUG] Error during emergency cleanup:', error);
1297
+ }
1298
+ }
1299
+
1300
+ // Periodic saving mechanism
1301
+ startPeriodicSaving() {
1302
+ // Clear any existing interval
1303
+ this.stopPeriodicSaving();
1304
+
1305
+ // Save every 10 seconds during solving to ensure we don't lose messages
1306
+ this.periodicSaveInterval = setInterval(() => {
1307
+ if (appState.currentSessionData && appState.isSolving) {
1308
+ console.log('[DEBUG] Periodic save triggered during solving');
1309
+ appState.currentSessionData.chatHistory = messageManager.getCurrentChatHistory();
1310
+ appState.currentSessionData.lastUsed = new Date().toISOString();
1311
+ this.saveCurrentSessionToStorage();
1312
+ } else {
1313
+ // Stop saving if we're no longer solving
1314
+ this.stopPeriodicSaving();
1315
+ }
1316
+ }, 10000); // 10 seconds
1317
+
1318
+ console.log('[DEBUG] Started periodic saving during solving');
1319
+ }
1320
+
1321
+ stopPeriodicSaving() {
1322
+ if (this.periodicSaveInterval) {
1323
+ clearInterval(this.periodicSaveInterval);
1324
+ this.periodicSaveInterval = null;
1325
+ console.log('[DEBUG] Stopped periodic saving');
1326
+ }
1327
+ }
1328
+
1329
+ // Automatically cleanup ghost sessions from storage during refresh
1330
+ cleanupGhostSessionsFromStorage(sessions) {
1331
+ let deletedCount = 0;
1332
+ const sessionIds = Object.keys(sessions);
1333
+
1334
+ sessionIds.forEach(sessionId => {
1335
+ const session = sessions[sessionId];
1336
+
1337
+ // Same aggressive filtering logic
1338
+ const isGhostSession = (
1339
+ (!session.title || session.title === 'Untitled Session' || session.title.trim() === '') &&
1340
+ (!session.chatHistory || session.chatHistory.length === 0) &&
1341
+ (!session.problemText || session.problemText.trim() === '') &&
1342
+ (!session.image || session.image === null)
1343
+ );
1344
+
1345
+ const isStuckSolvingSession = (
1346
+ session.status === 'solving' &&
1347
+ (!session.chatHistory || session.chatHistory.length === 0) &&
1348
+ (!session.problemText || session.problemText.trim() === '') &&
1349
+ Date.now() - new Date(session.createdAt || 0).getTime() > 60000 // 1 minute old
1350
+ );
1351
+
1352
+ if (isGhostSession || isStuckSolvingSession) {
1353
+ console.log(`[DEBUG] Auto-removing ghost session from storage: ${sessionId}`);
1354
+ delete sessions[sessionId];
1355
+ deletedCount++;
1356
+ }
1357
+ });
1358
+
1359
+ if (deletedCount > 0) {
1360
+ storageManager.saveSessions(sessions);
1361
+ console.log(`[DEBUG] Auto-cleaned ${deletedCount} ghost sessions from storage`);
1362
+ }
1363
+ }
1364
+
1365
+ // Cleanup ghost sessions from storage
1366
+ cleanupGhostSessions() {
1367
+ console.log('[DEBUG] Starting ghost session cleanup');
1368
+
1369
+ try {
1370
+ const sessions = storageManager.loadSessions();
1371
+ const sessionIds = Object.keys(sessions);
1372
+ let deletedCount = 0;
1373
+
1374
+ sessionIds.forEach(sessionId => {
1375
+ const session = sessions[sessionId];
1376
+
1377
+ // Identify ghost sessions
1378
+ const isGhostSession = (
1379
+ (!session.title || session.title === 'Untitled Session') &&
1380
+ (!session.chatHistory || session.chatHistory.length === 0) &&
1381
+ (!session.problemText || session.problemText.trim() === '') &&
1382
+ session.status !== 'solving' // Don't delete actual solving sessions
1383
+ );
1384
+
1385
+ if (isGhostSession) {
1386
+ // Check if it's old (more than 1 hour old)
1387
+ const sessionAge = Date.now() - new Date(session.createdAt || 0).getTime();
1388
+ const oneHour = 60 * 60 * 1000;
1389
+
1390
+ if (sessionAge > oneHour) {
1391
+ console.log(`[DEBUG] Cleaning up ghost session: ${sessionId}`);
1392
+ delete sessions[sessionId];
1393
+ deletedCount++;
1394
+ }
1395
+ }
1396
+ });
1397
+
1398
+ if (deletedCount > 0) {
1399
+ storageManager.saveSessions(sessions);
1400
+ console.log(`[DEBUG] Cleaned up ${deletedCount} ghost sessions`);
1401
+ } else {
1402
+ console.log('[DEBUG] No ghost sessions to clean up');
1403
+ }
1404
+
1405
+ } catch (error) {
1406
+ console.error('[DEBUG] Error during ghost session cleanup:', error);
1407
+ }
1408
+ }
1409
+
1410
+ // Enhanced clear all sessions with ghost cleanup
1411
+ clearAllSessionsEnhanced() {
1412
+ if (confirm('Are you sure you want to clear all session history? This cannot be undone.')) {
1413
+ try {
1414
+ // Also clear current session state
1415
+ appState.selectedSessionId = null;
1416
+ appState.currentSessionData = null;
1417
+
1418
+ // Clear storage
1419
+ storageManager.clearAllSessions();
1420
+
1421
+ // Clear UI
1422
+ domManager.clearInputs();
1423
+ imageHandler.clearImage();
1424
+ messageManager.clearChatAndRestoreWelcome();
1425
+ this.clearAndEnableInputs();
1426
+
1427
+ // Clear any final solution artifacts panels
1428
+ document.querySelectorAll('.final-artifacts-compact').forEach(panel => {
1429
+ panel.remove();
1430
+ });
1431
+
1432
+ // Clear DOM elements manually
1433
+ const sessionsList = domManager.getElement('sessionsList');
1434
+ if (sessionsList) {
1435
+ sessionsList.innerHTML = '';
1436
+ }
1437
+
1438
+ this.refreshSessionsList();
1439
+ domManager.updateStatus('All sessions cleared', 'success');
1440
+ Logger.debug('Session', 'All sessions cleared by user');
1441
+
1442
+ } catch (error) {
1443
+ Logger.error('Session', 'Error clearing sessions:', error);
1444
+ domManager.updateStatus('Error clearing sessions', 'error');
1445
+ }
1446
+ }
1447
+ }
1448
+ }
1449
+
1450
+ // Create singleton instance
1451
+ export const sessionManager = new SessionManager();
src/pips/static/js/ui/settings-manager.js ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Settings Manager - Handles settings modal, API key management, and form handling
3
+ */
4
+ import { Logger } from '../core/logger.js';
5
+ import { storageManager } from '../core/storage.js';
6
+ import { socketManager } from '../network/socket.js';
7
+ import { domManager } from './dom-manager.js';
8
+
9
+ export class SettingsManager {
10
+ constructor() {
11
+ this.isInitialized = false;
12
+ }
13
+
14
+ initialize() {
15
+ if (this.isInitialized) return;
16
+
17
+ this.setupEventListeners();
18
+ this.loadApiKeysFromStorage();
19
+
20
+ // Initialize PIPS mode to default first
21
+ this.initializePIPSMode();
22
+
23
+ // Then load user settings (which may override the default)
24
+ this.loadUserSettingsFromStorage();
25
+
26
+ this.isInitialized = true;
27
+
28
+ Logger.debug('Settings', 'Settings manager initialized');
29
+ }
30
+
31
+ setupEventListeners() {
32
+ // Settings modal listeners
33
+ domManager.getElement('settingsBtn')?.addEventListener('click', () => this.openSettings());
34
+ domManager.getElement('closeBtn')?.addEventListener('click', () => this.closeSettings());
35
+ domManager.getElement('settingsForm')?.addEventListener('submit', (e) => this.saveSettings(e));
36
+
37
+ // PIPS Mode iOS switch listener
38
+ domManager.getElement('pipsModeSwitch')?.addEventListener('change', () => {
39
+ this.updateModeIndicator();
40
+ this.autoSaveSettings();
41
+ });
42
+
43
+ // Auto-save on model selection changes
44
+ domManager.getElement('generatorModelSelect')?.addEventListener('change', () => this.autoSaveSettings());
45
+ domManager.getElement('criticModelSelect')?.addEventListener('change', () => this.autoSaveSettings());
46
+
47
+ // Auto-save on other setting changes
48
+ domManager.getElement('maxIterations')?.addEventListener('change', () => this.autoSaveSettings());
49
+ domManager.getElement('temperature')?.addEventListener('change', () => this.autoSaveSettings());
50
+ domManager.getElement('maxTokens')?.addEventListener('change', () => this.autoSaveSettings());
51
+ domManager.getElement('maxExecutionTime')?.addEventListener('change', () => this.autoSaveSettings());
52
+ // Custom rules handling - different behavior for global vs per-session
53
+ // Per-session rules (navbar) - don't auto-save to localStorage
54
+ domManager.getElement('customRules')?.addEventListener('input', () => {
55
+ // Per-session rules are not saved to localStorage
56
+ Logger.debug('Settings', 'Per-session custom rules updated');
57
+ });
58
+
59
+ // Global rules (settings modal) - auto-save to localStorage
60
+ domManager.getElement('customRulesSettings')?.addEventListener('input', () => {
61
+ this.autoSaveSettings();
62
+ });
63
+
64
+ // Settings tabs listeners
65
+ const tabButtons = document.querySelectorAll('.tab-button');
66
+ tabButtons.forEach(button => {
67
+ button.addEventListener('click', () => this.switchTab(button.dataset.tab));
68
+ });
69
+
70
+ // Modal click-outside-to-close
71
+ window.addEventListener('click', (event) => {
72
+ if (event.target === domManager.getElement('settingsModal')) {
73
+ this.closeSettings();
74
+ }
75
+ });
76
+
77
+ // Clear all sessions button with retry mechanism
78
+ const setupClearAllButton = () => {
79
+ const clearAllBtn = document.getElementById('clearAllSessionsBtn');
80
+ console.log('[DEBUG] Clear all sessions button:', clearAllBtn);
81
+ if (clearAllBtn) {
82
+ clearAllBtn.addEventListener('click', (e) => {
83
+ e.preventDefault();
84
+ console.log('[DEBUG] Clear all sessions button clicked');
85
+ this.clearAllSessions();
86
+ });
87
+ console.log('[DEBUG] Clear all sessions button listener added');
88
+ return true;
89
+ } else {
90
+ console.error('[DEBUG] Clear all sessions button not found');
91
+ return false;
92
+ }
93
+ };
94
+
95
+ // Try immediately
96
+ if (!setupClearAllButton()) {
97
+ // If not found, try again after a delay
98
+ setTimeout(() => {
99
+ setupClearAllButton();
100
+ }, 100);
101
+ }
102
+
103
+ // Also add a global click handler as backup
104
+ document.addEventListener('click', (e) => {
105
+ if (e.target && e.target.id === 'clearAllSessionsBtn') {
106
+ e.preventDefault();
107
+ console.log('[DEBUG] Clear all sessions button clicked via global handler');
108
+ this.clearAllSessions();
109
+ }
110
+ });
111
+
112
+ Logger.debug('Settings', 'Event listeners set up');
113
+ }
114
+
115
+ initializePIPSMode() {
116
+ const pipsModeSwitch = domManager.getElement('pipsModeSwitch');
117
+ const agentRadio = domManager.getElement('pipsModeAgent');
118
+ const interactiveRadio = domManager.getElement('pipsModeInteractive');
119
+
120
+ // Set Agent mode as default (will be overridden by loadUserSettingsFromStorage if user has saved settings)
121
+ if (pipsModeSwitch) {
122
+ pipsModeSwitch.checked = false; // Agent mode (unchecked state)
123
+ }
124
+
125
+ // Ensure radio buttons are in sync with switch
126
+ if (agentRadio && interactiveRadio && pipsModeSwitch) {
127
+ const isInteractive = pipsModeSwitch.checked;
128
+ agentRadio.checked = !isInteractive;
129
+ interactiveRadio.checked = isInteractive;
130
+ }
131
+
132
+ // Update the mode indicator
133
+ this.updateModeIndicator();
134
+
135
+ Logger.debug('Settings', 'PIPS mode initialized to default (Agent)');
136
+ }
137
+
138
+ openSettings() {
139
+ domManager.getElement('settingsModal').style.display = 'block';
140
+ Logger.debug('Settings', 'Settings modal opened');
141
+ }
142
+
143
+ closeSettings() {
144
+ domManager.getElement('settingsModal').style.display = 'none';
145
+ Logger.debug('Settings', 'Settings modal closed');
146
+ }
147
+
148
+ saveSettings(e) {
149
+ e.preventDefault();
150
+
151
+ try {
152
+ this.saveApiKeysToStorage();
153
+ // Persist non-sensitive user settings (exclude API keys and session rules) to localStorage
154
+ const { openai_api_key, google_api_key, anthropic_api_key, session_rules, ...nonSensitive } = this.getCurrentSettings();
155
+ storageManager.saveUserSettings(nonSensitive);
156
+ this.sendCurrentSettingsToServer();
157
+ Logger.debug('Settings', 'Settings saved successfully');
158
+ } catch (error) {
159
+ Logger.error('Settings', 'Error saving settings:', error);
160
+ domManager.updateStatus('Error saving settings', 'error');
161
+ }
162
+ }
163
+
164
+ // Auto-save settings to localStorage (without sending to server or showing status)
165
+ autoSaveSettings() {
166
+ try {
167
+ // Only save non-sensitive settings to localStorage
168
+ const { openai_api_key, google_api_key, anthropic_api_key, session_rules, ...nonSensitive } = this.getCurrentSettings();
169
+ // Remove session_rules from saved settings - they should not persist
170
+ storageManager.saveUserSettings(nonSensitive);
171
+ Logger.debug('Settings', 'Settings auto-saved to localStorage (excluding per-session rules)');
172
+ } catch (error) {
173
+ Logger.error('Settings', 'Error auto-saving settings:', error);
174
+ }
175
+ }
176
+
177
+ loadApiKeysFromStorage() {
178
+ try {
179
+ const apiKeys = storageManager.loadApiKeys();
180
+
181
+ if (apiKeys.openai_api_key) {
182
+ domManager.getElement('openaiApiKeyInput').value = apiKeys.openai_api_key;
183
+ }
184
+ if (apiKeys.google_api_key) {
185
+ domManager.getElement('googleApiKeyInput').value = apiKeys.google_api_key;
186
+ }
187
+ if (apiKeys.anthropic_api_key) {
188
+ domManager.getElement('anthropicApiKeyInput').value = apiKeys.anthropic_api_key;
189
+ }
190
+
191
+ Logger.debug('Settings', 'API keys loaded from storage');
192
+ } catch (error) {
193
+ Logger.error('Settings', 'Error loading API keys from storage:', error);
194
+ }
195
+ }
196
+
197
+ saveApiKeysToStorage() {
198
+ try {
199
+ const apiKeys = {
200
+ openai_api_key: domManager.getElement('openaiApiKeyInput').value.trim(),
201
+ google_api_key: domManager.getElement('googleApiKeyInput').value.trim(),
202
+ anthropic_api_key: domManager.getElement('anthropicApiKeyInput').value.trim()
203
+ };
204
+
205
+ storageManager.saveApiKeys(apiKeys);
206
+ Logger.debug('Settings', 'API keys saved to storage');
207
+ } catch (error) {
208
+ Logger.error('Settings', 'Error saving API keys to storage:', error);
209
+ }
210
+ }
211
+
212
+ sendCurrentSettingsToServer() {
213
+ try {
214
+ const pipsModeSwitch = domManager.getElement('pipsModeSwitch');
215
+ const pipsMode = pipsModeSwitch?.checked ? 'INTERACTIVE' : 'AGENT';
216
+
217
+ const settings = {
218
+ model: domManager.getElement('generatorModelSelect')?.value || 'gpt-4o-mini',
219
+ openai_api_key: domManager.getElement('openaiApiKeyInput').value.trim(),
220
+ google_api_key: domManager.getElement('googleApiKeyInput').value.trim(),
221
+ anthropic_api_key: domManager.getElement('anthropicApiKeyInput').value.trim(),
222
+ max_iterations: parseInt(domManager.getElement('maxIterations').value),
223
+ temperature: parseFloat(domManager.getElement('temperature').value),
224
+ max_tokens: parseInt(domManager.getElement('maxTokens').value),
225
+ max_execution_time: parseInt(domManager.getElement('maxExecutionTime').value),
226
+ // New PIPS interactive mode settings
227
+ pips_mode: pipsMode,
228
+ generator_model: domManager.getElement('generatorModelSelect')?.value || 'gpt-4o-mini',
229
+ critic_model: domManager.getElement('criticModelSelect')?.value || 'gpt-4o-mini',
230
+ // Send combined rules to backend and separate fields for internal tracking
231
+ custom_rules: this.getCombinedRulesForBackend(),
232
+ global_rules: domManager.getElement('customRulesSettings')?.value?.trim() || '',
233
+ session_rules: domManager.getElement('customRules')?.value?.trim() || ''
234
+ };
235
+
236
+ socketManager.send('update_settings', settings);
237
+ Logger.debug('Settings', 'Settings sent to server:', settings);
238
+ } catch (error) {
239
+ Logger.error('Settings', 'Error sending settings to server:', error);
240
+ }
241
+ }
242
+
243
+ updateModeIndicator() {
244
+ const pipsModeSwitch = domManager.getElement('pipsModeSwitch');
245
+ const modeDescription = domManager.getElement('modeDescription');
246
+ const agentRadio = domManager.getElement('pipsModeAgent');
247
+ const interactiveRadio = domManager.getElement('pipsModeInteractive');
248
+
249
+ if (pipsModeSwitch && modeDescription) {
250
+ const isInteractive = pipsModeSwitch.checked;
251
+ const selectedMode = isInteractive ? 'INTERACTIVE' : 'AGENT';
252
+
253
+ // Update description text
254
+ modeDescription.textContent = isInteractive
255
+ ? 'Collaborate with AI at each step'
256
+ : 'Automatic solving without user intervention';
257
+
258
+ // Sync with hidden radio buttons for backend compatibility
259
+ if (agentRadio && interactiveRadio) {
260
+ agentRadio.checked = !isInteractive;
261
+ interactiveRadio.checked = isInteractive;
262
+ }
263
+
264
+ Logger.debug('Settings', 'PIPS mode updated to:', selectedMode);
265
+ }
266
+ }
267
+
268
+ switchTab(tabName) {
269
+ // Remove active class from all tab buttons and content
270
+ document.querySelectorAll('.tab-button').forEach(btn => btn.classList.remove('active'));
271
+ document.querySelectorAll('.tab-content').forEach(content => content.classList.remove('active'));
272
+
273
+ // Add active class to clicked tab button and corresponding content
274
+ document.querySelector(`[data-tab="${tabName}"]`)?.classList.add('active');
275
+ document.querySelector(`#${tabName}-tab`)?.classList.add('active');
276
+
277
+ Logger.debug('Settings', 'Switched to tab:', tabName);
278
+ }
279
+
280
+ // Handle settings update response from server
281
+ handleSettingsUpdated(data) {
282
+ Logger.debug('Settings', 'Settings update response:', data);
283
+
284
+ if (data.status === 'success') {
285
+ domManager.updateStatus('Settings saved successfully!', 'success');
286
+ this.closeSettings();
287
+ } else {
288
+ domManager.updateStatus(`Settings error: ${data.message}`, 'error');
289
+ }
290
+ }
291
+
292
+ // Load saved API keys and send to server (called on app initialization)
293
+ initializeServerSettings() {
294
+ const apiKeys = storageManager.loadApiKeys();
295
+
296
+ if (apiKeys.openai_api_key || apiKeys.google_api_key) {
297
+ Logger.debug('Settings', 'Loading saved API keys and sending to server');
298
+ this.sendCurrentSettingsToServer();
299
+ domManager.updateStatus('API keys loaded from browser storage', 'success');
300
+ }
301
+ }
302
+
303
+ // Get current settings snapshot
304
+ getCurrentSettings() {
305
+ const pipsModeSwitch = domManager.getElement('pipsModeSwitch');
306
+ const pipsMode = pipsModeSwitch?.checked ? 'INTERACTIVE' : 'AGENT';
307
+
308
+ return {
309
+ model: domManager.getElement('generatorModelSelect')?.value || 'gpt-4o-mini',
310
+ openai_api_key: domManager.getElement('openaiApiKeyInput')?.value?.trim(),
311
+ google_api_key: domManager.getElement('googleApiKeyInput')?.value?.trim(),
312
+ anthropic_api_key: domManager.getElement('anthropicApiKeyInput')?.value?.trim(),
313
+ max_iterations: parseInt(domManager.getElement('maxIterations')?.value),
314
+ temperature: parseFloat(domManager.getElement('temperature')?.value),
315
+ max_tokens: parseInt(domManager.getElement('maxTokens')?.value),
316
+ max_execution_time: parseInt(domManager.getElement('maxExecutionTime')?.value),
317
+ // PIPS interactive mode settings
318
+ pips_mode: pipsMode,
319
+ generator_model: domManager.getElement('generatorModelSelect')?.value || 'gpt-4o-mini',
320
+ critic_model: domManager.getElement('criticModelSelect')?.value || 'gpt-4o-mini',
321
+ // Send combined rules to backend and separate fields for internal tracking
322
+ custom_rules: this.getCombinedRulesForBackend(),
323
+ global_rules: domManager.getElement('customRulesSettings')?.value?.trim() || '',
324
+ session_rules: domManager.getElement('customRules')?.value?.trim() || ''
325
+ };
326
+ }
327
+
328
+ // Update settings programmatically
329
+ updateSettings(settings) {
330
+ if (settings.openai_api_key && domManager.getElement('openaiApiKeyInput')) {
331
+ domManager.getElement('openaiApiKeyInput').value = settings.openai_api_key;
332
+ }
333
+ if (settings.google_api_key && domManager.getElement('googleApiKeyInput')) {
334
+ domManager.getElement('googleApiKeyInput').value = settings.google_api_key;
335
+ }
336
+ if (settings.anthropic_api_key && domManager.getElement('anthropicApiKeyInput')) {
337
+ domManager.getElement('anthropicApiKeyInput').value = settings.anthropic_api_key;
338
+ }
339
+ if (settings.max_iterations && domManager.getElement('maxIterations')) {
340
+ domManager.getElement('maxIterations').value = settings.max_iterations;
341
+ }
342
+ if (settings.temperature !== undefined && domManager.getElement('temperature')) {
343
+ domManager.getElement('temperature').value = settings.temperature;
344
+ }
345
+ if (settings.max_tokens && domManager.getElement('maxTokens')) {
346
+ domManager.getElement('maxTokens').value = settings.max_tokens;
347
+ }
348
+ if (settings.max_execution_time && domManager.getElement('maxExecutionTime')) {
349
+ domManager.getElement('maxExecutionTime').value = settings.max_execution_time;
350
+ }
351
+
352
+ // PIPS interactive mode settings
353
+ if (settings.pips_mode !== undefined) {
354
+ const pipsModeSwitch = domManager.getElement('pipsModeSwitch');
355
+ if (pipsModeSwitch) {
356
+ pipsModeSwitch.checked = settings.pips_mode === 'INTERACTIVE';
357
+ this.updateModeIndicator();
358
+ }
359
+ }
360
+
361
+ // Model settings - handle both old 'model' field and new separate fields
362
+ if (settings.model && domManager.getElement('generatorModelSelect')) {
363
+ domManager.getElement('generatorModelSelect').value = settings.model;
364
+ }
365
+ if (settings.generator_model && domManager.getElement('generatorModelSelect')) {
366
+ domManager.getElement('generatorModelSelect').value = settings.generator_model;
367
+ }
368
+ if (settings.critic_model && domManager.getElement('criticModelSelect')) {
369
+ domManager.getElement('criticModelSelect').value = settings.critic_model;
370
+ }
371
+ // Handle global rules (persistent across sessions)
372
+ if (settings.global_rules !== undefined && domManager.getElement('customRulesSettings')) {
373
+ domManager.getElement('customRulesSettings').value = settings.global_rules;
374
+ }
375
+
376
+ // Handle legacy custom_rules field for backward compatibility
377
+ if (settings.custom_rules !== undefined && settings.global_rules === undefined) {
378
+ if (domManager.getElement('customRulesSettings')) {
379
+ domManager.getElement('customRulesSettings').value = settings.custom_rules;
380
+ }
381
+ }
382
+
383
+ // Per-session rules (navbar) are NOT loaded from storage - they reset with each session
384
+
385
+ Logger.debug('Settings', 'Settings updated programmatically');
386
+ }
387
+
388
+ // Load user-selected settings (e.g., preferred model) from storage and apply them
389
+ loadUserSettingsFromStorage() {
390
+ try {
391
+ const settings = storageManager.loadUserSettings();
392
+ if (settings && Object.keys(settings).length > 0) {
393
+ // Load all settings including PIPS mode
394
+ this.updateSettings(settings);
395
+ Logger.debug('Settings', 'User settings loaded from storage');
396
+ }
397
+ } catch (error) {
398
+ Logger.error('Settings', 'Error loading user settings from storage:', error);
399
+ }
400
+ }
401
+
402
+ // Clear per-session rules (called when starting a new session)
403
+ clearPerSessionRules() {
404
+ const navbarElement = domManager.getElement('customRules');
405
+ if (navbarElement) {
406
+ navbarElement.value = '';
407
+ Logger.debug('Settings', 'Per-session custom rules cleared for new session');
408
+ }
409
+ }
410
+
411
+ // Get combined rules for sending to backend
412
+ getCombinedRulesForBackend() {
413
+ const globalRules = domManager.getElement('customRulesSettings')?.value?.trim() || '';
414
+ const sessionRules = domManager.getElement('customRules')?.value?.trim() || '';
415
+
416
+ // Combine global and session rules
417
+ const rules = [];
418
+ if (globalRules) {
419
+ rules.push(`Global Rules:\n${globalRules}`);
420
+ }
421
+ if (sessionRules) {
422
+ rules.push(`Session Rules:\n${sessionRules}`);
423
+ }
424
+
425
+ const combined = rules.join('\n\n');
426
+
427
+ Logger.debug('Settings', 'Combined rules for backend:', {
428
+ global: globalRules,
429
+ session: sessionRules,
430
+ combined: combined
431
+ });
432
+
433
+ return combined;
434
+ }
435
+
436
+ // Clear all sessions from the settings panel
437
+ clearAllSessions() {
438
+ console.log('[DEBUG] clearAllSessions method called');
439
+ if (confirm('Are you sure you want to permanently delete ALL session history? This action cannot be undone.')) {
440
+ try {
441
+ console.log('[DEBUG] User confirmed, clearing sessions');
442
+
443
+ // Clear storage directly
444
+ storageManager.clearAllSessions();
445
+
446
+ // Clear any current session state if accessible
447
+ if (window.appState) {
448
+ window.appState.selectedSessionId = null;
449
+ window.appState.currentSessionData = null;
450
+ }
451
+
452
+ // Clear UI elements
453
+ const sessionsList = document.getElementById('sessionsList');
454
+ if (sessionsList) {
455
+ sessionsList.innerHTML = '';
456
+ }
457
+
458
+ // Clear inputs
459
+ const questionInput = document.getElementById('questionInput');
460
+ if (questionInput) {
461
+ questionInput.value = '';
462
+ }
463
+
464
+ // Clear image
465
+ const imagePreview = document.getElementById('imagePreview');
466
+ if (imagePreview) {
467
+ imagePreview.style.display = 'none';
468
+ imagePreview.src = '';
469
+ }
470
+
471
+ // Clear chat area
472
+ const chatArea = document.getElementById('chatArea');
473
+ if (chatArea) {
474
+ chatArea.innerHTML = `
475
+ <div class="chat-message">
476
+ <div class="message-header">
477
+ <div class="message-avatar avatar-pips">P</div>
478
+ <span class="message-sender">PIPS System</span>
479
+ </div>
480
+ <div class="message-content">
481
+ Welcome to PIPS! Enter a problem in the left panel and click "Solve Problem" to get started.
482
+ Don't forget to configure your model settings first.
483
+ </div>
484
+ </div>
485
+ `;
486
+ }
487
+
488
+ domManager.updateStatus('All sessions cleared successfully', 'success');
489
+ Logger.debug('Settings', 'All sessions cleared from settings panel');
490
+ console.log('[DEBUG] All sessions cleared successfully');
491
+
492
+ } catch (error) {
493
+ console.error('[DEBUG] Error clearing sessions:', error);
494
+ Logger.error('Settings', 'Error clearing sessions from settings:', error);
495
+ domManager.updateStatus('Error clearing sessions', 'error');
496
+ }
497
+ } else {
498
+ console.log('[DEBUG] User cancelled session clearing');
499
+ }
500
+ }
501
+ }
502
+
503
+ // Create singleton instance
504
+ export const settingsManager = new SettingsManager();
src/pips/templates/index_modular.html ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>PIPS - Per-Instance Program Synthesis</title>
7
+
8
+ <!-- External Dependencies -->
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.2/socket.io.js"></script>
10
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
11
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/prism-core.min.js"></script>
12
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/plugins/autoloader/prism-autoloader.min.js"></script>
13
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism-tomorrow.min.css" rel="stylesheet">
14
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700;800;900&family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
15
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/feather-icons/4.29.1/feather.min.css" rel="stylesheet">
16
+
17
+ <!-- Modular CSS -->
18
+ <link href="{{ url_for('static', filename='css/main.css') }}" rel="stylesheet">
19
+ <link href="{{ url_for('static', filename='css/components/feedback.css') }}" rel="stylesheet">
20
+ </head>
21
+ <body>
22
+ <div class="container">
23
+ <!-- Left Panel - Input -->
24
+ <div class="left-panel">
25
+ <div class="header">
26
+ <h1>PIPS</h1>
27
+ <p>Per-Instance Program Synthesis</p>
28
+ </div>
29
+
30
+ <div class="input-section">
31
+ <!-- New Session Button -->
32
+ <div class="form-group">
33
+ <button id="newSessionBtn" class="btn btn-outline">
34
+ <i data-feather="plus" style="width: 16px; height: 16px;"></i>
35
+ Start New Session
36
+ </button>
37
+ </div>
38
+
39
+ <!-- PIPS Mode Selection (iOS-style switch) -->
40
+ <div class="form-group">
41
+ <label for="questionInput" class="form-label">Problem Description</label>
42
+ <textarea id="questionInput" placeholder="Enter your problem here... (e.g., 'What is the square root of 144?', 'Solve this math puzzle', etc.)"></textarea>
43
+ </div>
44
+
45
+ <!-- PIPS Mode Selection (iOS-style switch) -->
46
+ <div class="form-group">
47
+ <label class="form-label">PIPS Mode</label>
48
+ <div class="ios-switch-container">
49
+ <div class="ios-switch-wrapper">
50
+ <input type="checkbox" id="pipsModeSwitch" class="ios-switch-input">
51
+ <label for="pipsModeSwitch" class="ios-switch-label">
52
+ <span class="ios-switch-slider">
53
+ <span class="ios-switch-text ios-switch-text-left">Interactive</span>
54
+ <span class="ios-switch-text ios-switch-text-right">Agent</span>
55
+ </span>
56
+ </label>
57
+ </div>
58
+ <div class="mode-description">
59
+ <span id="modeDescription">Automatic solving without user intervention</span>
60
+ </div>
61
+ </div>
62
+ <!-- Hidden radio buttons for backend compatibility -->
63
+ <div style="display: none;">
64
+ <input type="radio" name="pipsMode" value="AGENT" id="pipsModeAgent" checked>
65
+ <input type="radio" name="pipsMode" value="INTERACTIVE" id="pipsModeInteractive">
66
+ </div>
67
+ </div>
68
+
69
+ <!-- Custom Rules (for critic) -->
70
+ <div class="form-group">
71
+ <label for="customRules" class="form-label">
72
+ Per-Session Rules for AI Critic
73
+ <small class="form-help-inline">(optional)</small>
74
+ </label>
75
+ <textarea id="customRules"
76
+ class="custom-rules-input"
77
+ placeholder="Add rules specific to this session... (e.g., 'Must use only numpy functions for this problem', 'Optimize for memory usage', etc.)"
78
+ rows="3"></textarea>
79
+ <div class="form-help">These rules apply only to the current session and will be cleared when starting a new session</div>
80
+ </div>
81
+
82
+ <div class="form-group">
83
+ <label class="form-label">Attach Image (optional)</label>
84
+ <div class="image-upload">
85
+ <label for="imageInput" class="image-upload-btn">
86
+ <i data-feather="image" style="width: 16px; height: 16px;"></i>
87
+ Choose Image
88
+ </label>
89
+ <div class="image-upload-text">
90
+ Or drag and drop an image here
91
+ </div>
92
+ <input type="file" id="imageInput" accept="image/*">
93
+ <img id="imagePreview" class="image-preview" style="display: none;">
94
+ </div>
95
+ <div class="form-help">Upload an image to include visual context with your problem</div>
96
+ </div>
97
+
98
+ <!-- Sessions Management Section -->
99
+ <div class="form-group">
100
+ <div class="sessions-header">
101
+ <label class="form-label">
102
+ <i data-feather="clock" style="width: 16px; height: 16px; margin-right: 8px;"></i>
103
+ Session History
104
+ </label>
105
+ <button type="button" class="sessions-toggle" id="sessionsToggle">
106
+ <i data-feather="chevron-down" style="width: 16px; height: 16px;"></i>
107
+ </button>
108
+ </div>
109
+ <div class="sessions-container" id="sessionsContainer">
110
+ <div class="sessions-list" id="sessionsList">
111
+ <!-- Sessions will be dynamically populated -->
112
+ </div>
113
+ <div class="sessions-actions">
114
+ <button type="button" class="btn-session-action" id="clearSessionsBtn">
115
+ <i data-feather="trash-2" style="width: 14px; height: 14px;"></i>
116
+ Clear All
117
+ </button>
118
+ <button type="button" class="btn-session-action" id="importSessionsBtn">
119
+ <i data-feather="upload" style="width: 14px; height: 14px;"></i>
120
+ Import
121
+ </button>
122
+ <button type="button" class="btn-session-action" id="exportSessionsBtn">
123
+ <i data-feather="download" style="width: 14px; height: 14px;"></i>
124
+ Export
125
+ </button>
126
+ <input type="file" id="importSessionsInput" accept=".json" style="display: none;">
127
+ </div>
128
+ </div>
129
+ <div class="form-help">Switch between past and current problem-solving sessions</div>
130
+ </div>
131
+
132
+ <div class="button-group">
133
+ <button id="solveBtn" class="btn btn-primary">
134
+ <i data-feather="play" style="width: 16px; height: 16px;"></i>
135
+ Solve Problem
136
+ </button>
137
+ <button id="interruptBtn" class="btn btn-danger" style="display: none;">
138
+ <i data-feather="stop-circle" style="width: 16px; height: 16px;"></i>
139
+ Stop
140
+ </button>
141
+ <button id="settingsBtn" class="btn btn-secondary">
142
+ <i data-feather="settings" style="width: 16px; height: 16px;"></i>
143
+ Settings
144
+ </button>
145
+ </div>
146
+ </div>
147
+ </div>
148
+
149
+ <!-- Right Panel - Chat -->
150
+ <div class="right-panel">
151
+ <div class="status-bar" id="statusIndicator">
152
+ Ready to solve problems
153
+ </div>
154
+
155
+ <div class="progress-container" id="progressContainer">
156
+ <div class="progress-bar">
157
+ <div class="progress-fill" id="progressFill"></div>
158
+ </div>
159
+ </div>
160
+
161
+ <div class="chat-area" id="chatArea">
162
+ <div class="chat-message">
163
+ <div class="message-header">
164
+ <div class="message-avatar avatar-pips">P</div>
165
+ <span class="message-sender">PIPS System</span>
166
+ </div>
167
+ <div class="message-content">
168
+ Welcome to PIPS! Enter a problem in the left panel and click "Solve Problem" to get started.
169
+ Don't forget to configure your model settings first.
170
+ <br><br>
171
+ <strong>New to PIPS?</strong> Check out the curated example sessions in the Session History panel on the left.
172
+ </div>
173
+ </div>
174
+ </div>
175
+
176
+ <!-- Container for interactive feedback panels and final artifacts -->
177
+ <div class="chat-container" id="chat-container"></div>
178
+
179
+ <div class="footer">
180
+ <div class="footer-info">
181
+ <span id="sessionInfo">Session: Not connected</span>
182
+ </div>
183
+ <button id="downloadBtn" class="btn btn-secondary">
184
+ <i data-feather="file-text" style="width: 16px; height: 16px;"></i>
185
+ Download PDF
186
+ </button>
187
+ </div>
188
+ </div>
189
+ </div>
190
+
191
+ <!-- Settings Modal -->
192
+ <div id="settingsModal" class="modal">
193
+ <div class="modal-content">
194
+ <div class="modal-header">
195
+ <h2 class="modal-title">Settings</h2>
196
+ <span class="close">&times;</span>
197
+ </div>
198
+
199
+ <div class="modal-body">
200
+ <!-- Settings Tabs -->
201
+ <div class="settings-tabs">
202
+ <button type="button" class="tab-button active" data-tab="general">General</button>
203
+ <button type="button" class="tab-button" data-tab="models">Models</button>
204
+ <button type="button" class="tab-button" data-tab="rules">Custom Rules</button>
205
+ </div>
206
+
207
+ <form id="settingsForm">
208
+ <!-- General Settings Tab -->
209
+ <div class="tab-content active" id="general-tab">
210
+ <div class="form-group">
211
+ <label for="openaiApiKeyInput">OpenAI API Key</label>
212
+ <input type="password" id="openaiApiKeyInput" placeholder="Enter your OpenAI API key (for GPT models)">
213
+ </div>
214
+
215
+ <div class="form-group">
216
+ <label for="googleApiKeyInput">Google API Key</label>
217
+ <input type="password" id="googleApiKeyInput" placeholder="Enter your Google API key (for Gemini models)">
218
+ </div>
219
+
220
+ <div class="form-group">
221
+ <label for="anthropicApiKeyInput">Anthropic API Key</label>
222
+ <input type="password" id="anthropicApiKeyInput" placeholder="Enter your Anthropic API key (for Claude models)">
223
+ </div>
224
+
225
+ <div class="form-group">
226
+ <label>Problem Solving Method</label>
227
+ <div class="form-help">
228
+ PIPS now selects between iterative code generation and chain-of-thought reasoning automatically for each problem.
229
+ </div>
230
+ </div>
231
+
232
+ <div class="form-group">
233
+ <label for="maxIterations">Max Iterations</label>
234
+ <input type="number" id="maxIterations" min="1" max="20" value="{{ default_settings.max_iterations }}">
235
+ </div>
236
+
237
+ <div class="form-group">
238
+ <label for="temperature">Temperature</label>
239
+ <input type="number" id="temperature" min="0" max="2" step="0.1" value="{{ default_settings.temperature }}">
240
+ </div>
241
+
242
+ <div class="form-group">
243
+ <label for="maxTokens">Max Tokens</label>
244
+ <input type="number" id="maxTokens" min="512" max="32768" step="512" value="{{ default_settings.max_tokens }}">
245
+ </div>
246
+
247
+ <div class="form-group">
248
+ <label for="maxExecutionTime">Max Code Execution Time (seconds)</label>
249
+ <input type="number" id="maxExecutionTime" min="1" max="60" value="{{ default_settings.max_execution_time }}">
250
+ <small class="form-help">Maximum time to wait for code execution before timing out</small>
251
+ </div>
252
+
253
+ <div class="form-group">
254
+ <label>Session Management</label>
255
+ <button type="button" id="clearAllSessionsBtn" class="btn btn-danger" style="width: 100%;">
256
+ <i data-feather="trash-2" style="width: 16px; height: 16px;"></i>
257
+ Clear All Sessions
258
+ </button>
259
+ <small class="form-help">Permanently delete all session history and chat data</small>
260
+ </div>
261
+ </div>
262
+
263
+ <!-- Models Settings Tab -->
264
+ <div class="tab-content" id="models-tab">
265
+ <div class="form-group">
266
+ <label for="generatorModelSelect">Code Generator Model</label>
267
+ <select id="generatorModelSelect">
268
+ {% for model_id, model_name in available_models.items() %}
269
+ <option value="{{ model_id }}" {% if model_id == default_settings.model %}selected{% endif %}>
270
+ {{ model_name }}
271
+ </option>
272
+ {% endfor %}
273
+ </select>
274
+ <small class="form-help">Model used for generating code solutions</small>
275
+ </div>
276
+
277
+ <div class="form-group">
278
+ <label for="criticModelSelect">Code Critic Model</label>
279
+ <select id="criticModelSelect">
280
+ {% for model_id, model_name in available_models.items() %}
281
+ <option value="{{ model_id }}" {% if model_id == default_settings.model %}selected{% endif %}>
282
+ {{ model_name }}
283
+ </option>
284
+ {% endfor %}
285
+ </select>
286
+ <small class="form-help">Model used for reviewing and critiquing generated code</small>
287
+ </div>
288
+
289
+ <div class="form-help">
290
+ <p><strong>Tip:</strong> You can use different models for generation and criticism. For example, use a fast model for generation and a more powerful model for criticism.</p>
291
+ </div>
292
+ </div>
293
+
294
+ <!-- Custom Rules Tab -->
295
+ <div class="tab-content" id="rules-tab">
296
+ <div class="form-group">
297
+ <label for="customRulesSettings">Global Rules for AI Critic</label>
298
+ <textarea id="customRulesSettings"
299
+ class="custom-rules-input"
300
+ rows="8"
301
+ placeholder="Add global rules that apply to all sessions...
302
+
303
+ Examples:
304
+ • Must use only numpy functions for mathematical operations
305
+ • Code should be optimized for performance over readability
306
+ • Include comprehensive error handling
307
+ • Follow PEP 8 style guidelines strictly
308
+ • Use type hints for all function parameters"></textarea>
309
+ <small class="form-help">These global rules persist across all sessions and are combined with any per-session rules you specify in the main interface.</small>
310
+ </div>
311
+
312
+ <div class="form-help">
313
+ <p><strong>How it works:</strong> Global rules are saved and applied to all sessions, while per-session rules (from the main interface) apply only to the current session.</p>
314
+ <p>The critic will use both types of rules along with its built-in knowledge when evaluating generated code.</p>
315
+ </div>
316
+ </div>
317
+
318
+ <button type="submit" class="btn btn-primary" style="width: 100%;">
319
+ <i data-feather="check" style="width: 16px; height: 16px;"></i>
320
+ Save Settings
321
+ </button>
322
+ </form>
323
+ </div>
324
+ </div>
325
+ </div>
326
+
327
+ <!-- Feather Icons -->
328
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/feather-icons/4.29.1/feather.min.js"></script>
329
+
330
+ <!-- PDF Generation -->
331
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>
332
+
333
+ <!-- Modular JavaScript -->
334
+ <script src="{{ url_for('static', filename='js/ui/interactive-feedback.js') }}"></script>
335
+ <script type="module" src="{{ url_for('static', filename='js/main.js') }}"></script>
336
+ </body>
337
+ </html>
src/pips/utils.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions and data structures for PIPS.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Optional
7
+ from io import BytesIO
8
+ import base64
9
+ import contextlib
10
+ import multiprocessing
11
+ import timeout_decorator
12
+ from io import StringIO
13
+ from contextlib import redirect_stdout
14
+ from PIL import Image
15
+
16
+
17
+ @dataclass
18
+ class RawInput:
19
+ """Dataclass to store raw input for a function."""
20
+ image_input: Optional[Image.Image]
21
+ text_input: Optional[str]
22
+
23
+
24
+ def img2base64(img):
25
+ """Convert PIL Image to base64 string."""
26
+ buffer = BytesIO()
27
+ if img.mode != "RGB":
28
+ img = img.convert("RGB")
29
+
30
+ # if width or height < 28, resize it keeping aspect ratio
31
+ if img.width < 28 or img.height < 28:
32
+ # make smallest dimension 28
33
+ new_width = 28
34
+ new_height = 28
35
+ if img.width < img.height:
36
+ new_height = int((28 / img.width) * img.height)
37
+ else:
38
+ new_width = int((28 / img.height) * img.width)
39
+ img = img.resize((new_width, new_height))
40
+
41
+ img.save(buffer, format="JPEG")
42
+ return base64.b64encode(buffer.getvalue()).decode()
43
+
44
+
45
+ def base642img(base64_str):
46
+ """Convert base64 string to PIL Image."""
47
+ imgdata = base64.b64decode(base64_str)
48
+ return Image.open(BytesIO(imgdata))
49
+
50
+
51
+ @timeout_decorator.timeout(0.5)
52
+ def my_exec(code, locs):
53
+ exec(code, locs, locs)
54
+
55
+
56
+ def run_with_timeout(code, timeout, code_context=None):
57
+ """Execute code with timeout and capture output."""
58
+ def target(queue):
59
+ locs = {} # Standard dictionary for local variables
60
+ locs["__name__"] = "__main__"
61
+ try:
62
+ if code_context:
63
+ exec(code_context, locs, locs)
64
+ except Exception as e:
65
+ pass
66
+
67
+ try:
68
+ # store stdout in a variable
69
+ f = StringIO()
70
+ with redirect_stdout(f):
71
+ exec(code, locs, locs) # Execute the code with locs as locals
72
+ if "answer" in locs:
73
+ queue.put((locs.get("answer", None), f.getvalue())) # Retrieve the value of "answer"
74
+ else:
75
+ queue.put((None, f.getvalue())) # Retrieve the output
76
+ except Exception as e:
77
+ queue.put((f"Error: {e}", f.getvalue()))
78
+
79
+ queue = multiprocessing.Queue() # Queue for communication
80
+ process = multiprocessing.Process(target=target, args=(queue,))
81
+ process.start()
82
+ process.join(timeout)
83
+
84
+ if process.is_alive():
85
+ process.terminate()
86
+ process.join()
87
+ return None, "", "Error: Code execution timed out"
88
+
89
+ # Retrieve result from the queue
90
+ if not queue.empty():
91
+ result = queue.get()
92
+ answer, stdout = result[0], result[1]
93
+ # Check if the answer indicates an error
94
+ if isinstance(answer, str) and answer.startswith("Error:"):
95
+ return None, stdout, answer # Return error as the third element
96
+ else:
97
+ return answer, stdout, None # No error
98
+ return None, "", None
99
+
100
+
101
+ def python_eval(code: str, code_context: str = None, max_execution_time: int = 5):
102
+ """Evaluate Python code and return the result."""
103
+ try:
104
+ if "if __name__ == '__main__'" in code:
105
+ code = code.replace(
106
+ "if __name__ == '__main__':\n main()",
107
+ " return answer\nif __name__ == '__main__':\n answer = main()",
108
+ )
109
+ code = code.replace(
110
+ 'if __name__ == "__main__":\n main()',
111
+ " return answer\nif __name__ == '__main__':\n answer = main()",
112
+ )
113
+ code = "answer = None\n" + code
114
+ if "main():" in code:
115
+ code += "\nmain()"
116
+
117
+ return run_with_timeout(code, max_execution_time, code_context)
118
+ except Exception as e:
119
+ print("Exception:", e)
120
+ return "None", "", str(e)
121
+
122
+
123
+ def eval_extracted_code(code):
124
+ """Evaluate extracted code and return the answer."""
125
+ try:
126
+ locs = {'__name__': '__main__'}
127
+ with contextlib.redirect_stdout(None):
128
+ exec(code, locs, locs)
129
+ return locs["answer"]
130
+ except Exception as e:
131
+ return "None"
src/pips/web_app.py ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Flask-SocketIO server for the PIPS front-end.
3
+
4
+ Matches the JS events used in index.html:
5
+ • session_connected
6
+ • settings_updated
7
+ • solving_started / step_update / llm_streaming_* / code_execution_* / code_check
8
+ • solving_complete / solving_error / solving_interrupted
9
+ • heartbeat_response
10
+ • download_chat_log
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import threading
18
+ import time
19
+ from datetime import datetime
20
+ from typing import Any, Dict
21
+
22
+ from flask import Flask, render_template, request, jsonify
23
+ from flask_socketio import SocketIO, emit
24
+
25
+ # ─── project modules ────────────────────────────────────────────────────────────
26
+ from .models import AVAILABLE_MODELS, get_model
27
+ from .core import PIPSSolver, PIPSMode
28
+ from .utils import RawInput, base642img
29
+ # ────────────────────────────────────────────────────────────────────────────────
30
+
31
+ # ---------------------------------------------------------------------
32
+ # basic app setup
33
+ # ---------------------------------------------------------------------
34
+ app = Flask(__name__, template_folder="templates")
35
+ app.config["SECRET_KEY"] = "change-me" # ← customise for prod
36
+ socketio = SocketIO(app, cors_allowed_origins="*")
37
+
38
+ # ---------------------------------------------------------------------
39
+ # server-side session state
40
+ # ---------------------------------------------------------------------
41
+ DEFAULT_SETTINGS = dict(
42
+ model = next(iter(AVAILABLE_MODELS)), # first model id
43
+ openai_api_key = "",
44
+ google_api_key = "",
45
+ anthropic_api_key = "",
46
+ max_iterations = 8,
47
+ temperature = 0.0,
48
+ max_tokens = 4096,
49
+ max_execution_time = 10,
50
+ # New interactive mode settings
51
+ pips_mode = "AGENT", # or "INTERACTIVE"
52
+ generator_model = next(iter(AVAILABLE_MODELS)), # can be different from critic
53
+ critic_model = next(iter(AVAILABLE_MODELS)), # can be different from generator
54
+ custom_rules = "", # textarea value
55
+ prompt_overrides = {}, # persisted user edits keyed by prompt-id
56
+ )
57
+
58
+ sessions: Dict[str, Dict[str, Any]] = {}
59
+ active_tasks: Dict[str, Dict[str, Any]] = {}
60
+
61
+ def _safe(obj):
62
+ """JSON-serialise anything (fractions etc. become strings)."""
63
+ if obj is None or isinstance(obj, (str, int, float, bool)):
64
+ return obj
65
+ if isinstance(obj, list):
66
+ return [_safe(x) for x in obj]
67
+ if isinstance(obj, dict):
68
+ return {k: _safe(v) for k, v in obj.items()}
69
+ return str(obj)
70
+
71
+
72
+ def make_callbacks(sid: str, generator_model_name: str, critic_model_name: str, stop_evt: threading.Event, max_exec: int):
73
+ """Build the callbacks dict required by PIPSSolver (stream=True)."""
74
+
75
+ def _emit(event: str, payload: dict):
76
+ # Force immediate emission without buffering
77
+ if event == "llm_streaming_token":
78
+ print(f"[DEBUG] Emitting token for session {sid}: '{payload.get('token', '')[:20]}...'")
79
+ elif event == "code_check_streaming_token":
80
+ print(f"[DEBUG] Emitting code reviewer token for session {sid}: '{payload.get('token', '')[:20]}...'")
81
+ else:
82
+ print(f"[DEBUG] Emitting {event} for session {sid}")
83
+ socketio.emit(event, payload, room=sid)
84
+ # Force flush the socket
85
+ socketio.sleep(0) # This forces Flask-SocketIO to flush immediately
86
+
87
+ cb = dict(
88
+ # progress
89
+ on_step_update=lambda step, msg, iteration=None, prompt_details=None, **_: _emit(
90
+ "step_update", dict(step=step, message=msg, iteration=iteration, prompt_details=prompt_details)
91
+ ),
92
+
93
+ # streaming
94
+ on_llm_streaming_start=lambda it, m: _emit(
95
+ "llm_streaming_start", dict(iteration=it, model_name=generator_model_name)
96
+ ),
97
+ on_llm_streaming_token=lambda tok, it, m: _emit(
98
+ "llm_streaming_token", dict(token=tok, iteration=it, model_name=generator_model_name)
99
+ ),
100
+ on_llm_streaming_end=lambda it, m: _emit(
101
+ "llm_streaming_end", dict(iteration=it, model_name=generator_model_name)
102
+ ),
103
+
104
+ # code reviewer streaming
105
+ on_code_check_streaming_start=lambda it, m: _emit(
106
+ "code_check_streaming_start", dict(iteration=it, model_name=critic_model_name)
107
+ ),
108
+ on_code_check_streaming_token=lambda tok, it, m: _emit(
109
+ "code_check_streaming_token", dict(token=tok, iteration=it, model_name=critic_model_name)
110
+ ),
111
+ on_code_check_streaming_end=lambda it, m: _emit(
112
+ "code_check_streaming_end", dict(iteration=it, model_name=critic_model_name)
113
+ ),
114
+
115
+ # code execution lifecycle
116
+ on_code_execution_start=lambda it: _emit(
117
+ "code_execution_start", dict(iteration=it)
118
+ ),
119
+ on_code_execution_end=lambda it: _emit(
120
+ "code_execution_end", dict(iteration=it)
121
+ ),
122
+ on_code_execution=lambda it, out, stdout, err: _emit(
123
+ "code_execution",
124
+ dict(iteration=it, output=str(out), stdout=stdout, error=err),
125
+ ),
126
+
127
+ # Legacy on_code_check callback removed - now using streaming only
128
+
129
+ on_error=lambda msg: _emit("solving_error", dict(error=msg)),
130
+
131
+ # interruption / limits
132
+ check_interrupted=stop_evt.is_set,
133
+ get_max_execution_time=lambda: max_exec,
134
+
135
+ # interactive mode callback
136
+ on_waiting_for_user=lambda iteration, critic_text, code, symbols: _emit(
137
+ "awaiting_user_feedback",
138
+ dict(iteration=iteration, critic_text=critic_text, code=code, symbols=_safe(symbols))
139
+ ),
140
+ )
141
+ return cb
142
+
143
+
144
+ # ========== routes =================================================================
145
+
146
+ @app.route("/")
147
+ def index():
148
+ return render_template(
149
+ "index_modular.html",
150
+ available_models=AVAILABLE_MODELS,
151
+ default_settings=DEFAULT_SETTINGS,
152
+ )
153
+
154
+
155
+ # ========== socket events ===========================================================
156
+
157
+ @socketio.on("connect")
158
+ def on_connect():
159
+ sid = request.sid
160
+ sessions[sid] = dict(settings=DEFAULT_SETTINGS.copy(), chat=[])
161
+ emit("session_connected", {"session_id": sid})
162
+ print(f"[CONNECT] {sid}")
163
+
164
+
165
+ @socketio.on("disconnect")
166
+ def on_disconnect():
167
+ sid = request.sid
168
+ if sid in active_tasks:
169
+ active_tasks[sid]["event"].set()
170
+ active_tasks.pop(sid, None)
171
+ sessions.pop(sid, None)
172
+ print(f"[DISCONNECT] {sid}")
173
+
174
+
175
+ @socketio.on("update_settings")
176
+ def on_update_settings(data):
177
+ sid = request.sid
178
+ if sid not in sessions:
179
+ emit("settings_updated", {"status": "error", "message": "No session"})
180
+ return
181
+
182
+ sessions[sid]["settings"].update(data)
183
+ emit("settings_updated", {"status": "success", "settings": sessions[sid]["settings"]})
184
+
185
+
186
+ @socketio.on("solve_problem")
187
+ def on_solve_problem(data):
188
+ sid = request.sid
189
+ if sid not in sessions:
190
+ emit("solving_error", {"error": "Session vanished"})
191
+ return
192
+
193
+ text = (data.get("text") or "").strip()
194
+ if not text:
195
+ emit("solving_error", {"error": "Problem text is empty"})
196
+ return
197
+
198
+ img_b64 = data.get("image")
199
+ img = None
200
+ if img_b64 and img_b64.startswith("data:image"):
201
+ try:
202
+ img = base642img(img_b64.split(",", 1)[1])
203
+ except Exception as e:
204
+ emit("solving_error", {"error": f"Bad image: {e}"})
205
+ return
206
+
207
+ settings = sessions[sid]["settings"]
208
+ generator_model_id = settings.get("generator_model", settings["model"])
209
+ critic_model_id = settings.get("critic_model", settings["model"])
210
+ pips_mode = settings.get("pips_mode", "AGENT")
211
+ # Handle both new format (global_rules + session_rules) and legacy format (custom_rules)
212
+ global_rules = settings.get("global_rules", "")
213
+ session_rules = settings.get("session_rules", "")
214
+ legacy_custom_rules = settings.get("custom_rules", "")
215
+
216
+ # Combine rules for the critic
217
+ combined_rules = []
218
+ if global_rules:
219
+ combined_rules.append(f"Global Rules:\n{global_rules}")
220
+ if session_rules:
221
+ combined_rules.append(f"Session Rules:\n{session_rules}")
222
+ if legacy_custom_rules and not global_rules and not session_rules:
223
+ # Backward compatibility
224
+ combined_rules.append(legacy_custom_rules)
225
+
226
+ custom_rules = "\n\n".join(combined_rules)
227
+
228
+ print(f"[DEBUG] Custom rules processing for session {sid}:")
229
+ print(f" Global rules: {repr(global_rules)}")
230
+ print(f" Session rules: {repr(session_rules)}")
231
+ print(f" Legacy rules: {repr(legacy_custom_rules)}")
232
+ print(f" Combined rules: {repr(custom_rules)}")
233
+
234
+ # Helper function to get API key for a model
235
+ def get_api_key_for_model(model_id):
236
+ if any(model_id.startswith(model) for model in ["gpt", "o3", "o4"]):
237
+ return settings.get("openai_api_key")
238
+ elif "gemini" in model_id:
239
+ return settings.get("google_api_key")
240
+ elif "claude" in model_id:
241
+ return settings.get("anthropic_api_key")
242
+ return None
243
+
244
+ # Validate API key for generator model upfront
245
+ generator_api_key = get_api_key_for_model(generator_model_id)
246
+ critic_api_key = get_api_key_for_model(critic_model_id)
247
+
248
+ if not generator_api_key:
249
+ emit("solving_error", {"error": f"API key missing for generator model: {generator_model_id}"})
250
+ return
251
+
252
+ stop_evt = threading.Event()
253
+
254
+ def task():
255
+ try:
256
+ print(f"[DEBUG] Starting solving task for session {sid}")
257
+
258
+ sample = RawInput(text_input=text, image_input=img)
259
+
260
+ # Instantiate generator model
261
+ generator_model = get_model(generator_model_id, generator_api_key)
262
+
263
+ cbs = make_callbacks(
264
+ sid, generator_model_id, critic_model_id, stop_evt, settings["max_execution_time"]
265
+ )
266
+
267
+ print(f"[DEBUG] Emitting solving_started for session {sid}")
268
+ socketio.emit("solving_started", {}, room=sid)
269
+ socketio.sleep(0) # Force flush
270
+
271
+ critic_model = generator_model
272
+ if critic_model_id != generator_model_id:
273
+ if critic_api_key:
274
+ critic_model = get_model(critic_model_id, critic_api_key)
275
+ else:
276
+ print(f"[DEBUG] Critic API key missing for {critic_model_id}; falling back to generator model for criticism.")
277
+
278
+ requested_interactive = (pips_mode == "INTERACTIVE")
279
+ solver = PIPSSolver(
280
+ generator_model,
281
+ max_iterations=settings["max_iterations"],
282
+ temperature=settings["temperature"],
283
+ max_tokens=settings["max_tokens"],
284
+ interactive=requested_interactive,
285
+ critic_model=critic_model,
286
+ )
287
+
288
+ decision_max_tokens = min(1024, settings["max_tokens"])
289
+ answer, logs, mode_decision_summary = solver.solve(
290
+ sample,
291
+ stream=True,
292
+ callbacks=cbs,
293
+ additional_rules=custom_rules,
294
+ decision_max_tokens=decision_max_tokens,
295
+ interactive_requested=requested_interactive,
296
+ )
297
+
298
+ use_code = mode_decision_summary.get("use_code", False)
299
+ if sid in sessions:
300
+ sessions[sid]["mode_decision"] = mode_decision_summary
301
+ print(
302
+ f"[DEBUG] Mode decision for session {sid}: "
303
+ f"use_code={use_code}, requested_interactive={requested_interactive}"
304
+ )
305
+
306
+ if use_code and critic_model_id != generator_model_id and not critic_api_key:
307
+ cbs["on_step_update"](
308
+ "mode_selection",
309
+ "Proceeding without a dedicated critic model because no API key was provided.",
310
+ iteration=None,
311
+ )
312
+
313
+ if use_code:
314
+ print(f"[DEBUG] Used iterative code path for session {sid}")
315
+ # If interactive mode returned early (waiting for user), store solver in session
316
+ if requested_interactive and not answer and solver._checkpoint:
317
+ if sid in sessions:
318
+ sessions[sid]["solver"] = solver
319
+ print(f"[DEBUG] Interactive mode - waiting for user feedback for session {sid}")
320
+ return
321
+ else:
322
+ print(f"[DEBUG] Used chain-of-thought path for session {sid}")
323
+
324
+ if stop_evt.is_set():
325
+ print(f"[DEBUG] Task was interrupted for session {sid}")
326
+ socketio.emit("solving_interrupted", {"message": "Interrupted"}, room=sid)
327
+ return
328
+
329
+ print(f"[DEBUG] Solving completed, emitting final answer for session {sid}")
330
+
331
+ if not isinstance(logs, dict) or logs is None:
332
+ logs = {} # ensure logs is a dict for augmentation
333
+ if isinstance(logs, dict):
334
+ logs.setdefault("mode_decision", mode_decision_summary)
335
+
336
+ # Extract final artifacts for display
337
+ latest_symbols = logs.get("all_symbols", [])[-1] if logs.get("all_symbols") else {}
338
+ latest_code = logs.get("all_programs", [])[-1] if logs.get("all_programs") else ""
339
+
340
+ # Emit final artifacts
341
+ socketio.emit("final_artifacts", {
342
+ "symbols": _safe(latest_symbols),
343
+ "code": latest_code
344
+ }, room=sid)
345
+
346
+ socketio.emit(
347
+ "solving_complete",
348
+ {
349
+ "final_answer": answer,
350
+ "logs": _safe(logs),
351
+ "method": "iterative_code" if use_code else "chain_of_thought",
352
+ },
353
+ room=sid,
354
+ )
355
+ if sid in sessions:
356
+ sessions[sid].pop("mode_decision", None)
357
+
358
+ except Exception as exc:
359
+ print(f"[DEBUG] Exception in solving task for session {sid}: {exc}")
360
+ if sid in sessions:
361
+ sessions[sid].pop("mode_decision", None)
362
+ socketio.emit("solving_error", {"error": str(exc)}, room=sid)
363
+ finally:
364
+ print(f"[DEBUG] Cleaning up task for session {sid}")
365
+ active_tasks.pop(sid, None)
366
+
367
+ active_tasks[sid] = dict(event=stop_evt, task=socketio.start_background_task(task))
368
+
369
+
370
+ @socketio.on("interrupt_solving")
371
+ def on_interrupt(data=None):
372
+ sid = request.sid
373
+ if sid in active_tasks:
374
+ active_tasks[sid]["event"].set()
375
+ emit("solving_interrupted", {"message": "Stopped."})
376
+ else:
377
+ emit("solving_interrupted", {"message": "No active task."})
378
+
379
+
380
+ @socketio.on("provide_feedback")
381
+ def on_provide_feedback(data):
382
+ """Handle user feedback in interactive mode."""
383
+ sid = request.sid
384
+ if sid not in sessions:
385
+ emit("solving_error", {"error": "Session vanished"})
386
+ return
387
+
388
+ solver = sessions[sid].get("solver")
389
+ if not solver or not solver._checkpoint:
390
+ emit("solving_error", {"error": "No interactive session waiting for feedback"})
391
+ return
392
+
393
+ # Extract user feedback
394
+ user_feedback = {
395
+ "accept_critic": data.get("accept_critic", True),
396
+ "extra_comments": data.get("extra_comments", ""),
397
+ "quoted_ranges": data.get("quoted_ranges", []),
398
+ "terminate": data.get("terminate", False)
399
+ }
400
+
401
+ def continue_task():
402
+ try:
403
+ print(f"[DEBUG] Continuing interactive task with user feedback for session {sid}")
404
+
405
+ # Continue from checkpoint with user feedback
406
+ answer, logs = solver.continue_from_checkpoint(user_feedback)
407
+
408
+ mode_decision = sessions[sid].get("mode_decision") or getattr(solver, "_mode_decision_summary", None)
409
+ if not isinstance(logs, dict) or logs is None:
410
+ logs = {}
411
+ if isinstance(logs, dict) and mode_decision:
412
+ logs.setdefault("mode_decision", mode_decision)
413
+
414
+ # Extract final artifacts
415
+ latest_symbols = logs.get("all_symbols", [])[-1] if logs.get("all_symbols") else {}
416
+ latest_code = logs.get("all_programs", [])[-1] if logs.get("all_programs") else ""
417
+
418
+ # Emit final artifacts
419
+ socketio.emit("final_artifacts", {
420
+ "symbols": _safe(latest_symbols),
421
+ "code": latest_code
422
+ }, room=sid)
423
+
424
+ # Emit completion
425
+ socketio.emit("solving_complete", {
426
+ "final_answer": answer,
427
+ "logs": _safe(logs),
428
+ "method": "iterative_code_interactive",
429
+ }, room=sid)
430
+ sessions[sid].pop("mode_decision", None)
431
+
432
+ except Exception as exc:
433
+ print(f"[DEBUG] Exception in interactive continuation for session {sid}: {exc}")
434
+ socketio.emit("solving_error", {"error": str(exc)}, room=sid)
435
+ if sid in sessions:
436
+ sessions[sid].pop("mode_decision", None)
437
+ finally:
438
+ # Clean up
439
+ if sid in sessions:
440
+ sessions[sid].pop("solver", None)
441
+ active_tasks.pop(sid, None)
442
+
443
+ # Start continuation task
444
+ active_tasks[sid] = dict(event=threading.Event(), task=socketio.start_background_task(continue_task))
445
+
446
+
447
+ @socketio.on("terminate_session")
448
+ def on_terminate_session(data=None):
449
+ """Handle user termination of interactive session."""
450
+ sid = request.sid
451
+ if sid not in sessions:
452
+ emit("solving_error", {"error": "Session vanished"})
453
+ return
454
+
455
+ solver = sessions[sid].get("solver")
456
+ if not solver or not solver._checkpoint:
457
+ emit("solving_error", {"error": "No interactive session to terminate"})
458
+ return
459
+
460
+ # Terminate with current state
461
+ user_feedback = {"terminate": True}
462
+
463
+ def terminate_task():
464
+ try:
465
+ print(f"[DEBUG] Terminating interactive task for session {sid}")
466
+
467
+ # Get final answer from checkpoint
468
+ answer, logs = solver.continue_from_checkpoint(user_feedback)
469
+
470
+ mode_decision = sessions[sid].get("mode_decision") or getattr(solver, "_mode_decision_summary", None)
471
+ if not isinstance(logs, dict) or logs is None:
472
+ logs = {}
473
+ if isinstance(logs, dict) and mode_decision:
474
+ logs.setdefault("mode_decision", mode_decision)
475
+
476
+ # Extract final artifacts
477
+ latest_symbols = logs.get("all_symbols", [])[-1] if logs.get("all_symbols") else {}
478
+ latest_code = logs.get("all_programs", [])[-1] if logs.get("all_programs") else ""
479
+
480
+ # Emit final artifacts
481
+ socketio.emit("final_artifacts", {
482
+ "symbols": _safe(latest_symbols),
483
+ "code": latest_code
484
+ }, room=sid)
485
+
486
+ # Emit completion
487
+ socketio.emit("solving_complete", {
488
+ "final_answer": answer,
489
+ "logs": _safe(logs),
490
+ "method": "iterative_code_interactive_terminated",
491
+ }, room=sid)
492
+ sessions[sid].pop("mode_decision", None)
493
+
494
+ except Exception as exc:
495
+ print(f"[DEBUG] Exception in interactive termination for session {sid}: {exc}")
496
+ socketio.emit("solving_error", {"error": str(exc)}, room=sid)
497
+ if sid in sessions:
498
+ sessions[sid].pop("mode_decision", None)
499
+ finally:
500
+ # Clean up
501
+ if sid in sessions:
502
+ sessions[sid].pop("solver", None)
503
+ active_tasks.pop(sid, None)
504
+
505
+ # Start termination task
506
+ active_tasks[sid] = dict(event=threading.Event(), task=socketio.start_background_task(terminate_task))
507
+
508
+
509
+ @socketio.on("switch_mode")
510
+ def on_switch_mode(data):
511
+ """Handle switching between AGENT and INTERACTIVE modes."""
512
+ sid = request.sid
513
+ if sid not in sessions:
514
+ emit("solving_error", {"error": "Session vanished"})
515
+ return
516
+
517
+ new_mode = data.get("mode", "AGENT")
518
+ if new_mode not in ["AGENT", "INTERACTIVE"]:
519
+ emit("solving_error", {"error": "Invalid mode"})
520
+ return
521
+
522
+ # Update session settings
523
+ sessions[sid]["settings"]["pips_mode"] = new_mode
524
+
525
+ emit("mode_switched", {"mode": new_mode})
526
+
527
+
528
+ @socketio.on("heartbeat")
529
+ def on_heartbeat(data):
530
+ emit("heartbeat_response", {"timestamp": data.get("timestamp"), "server_time": time.time()})
531
+
532
+
533
+ @socketio.on("download_chat_log")
534
+ def on_download_chat_log():
535
+ sid = request.sid
536
+ sess = sessions.get(sid)
537
+ if not sess:
538
+ emit("error", {"message": "Session missing"})
539
+ return
540
+
541
+ payload = dict(
542
+ session_id=sid,
543
+ timestamp=datetime.utcnow().isoformat(),
544
+ settings=_safe(sess["settings"]),
545
+ chat_history=_safe(sess["chat"]),
546
+ )
547
+ emit(
548
+ "chat_log_ready",
549
+ {
550
+ "filename": f"pips_chat_{sid[:8]}.json",
551
+ "content": json.dumps(payload, indent=2),
552
+ },
553
+ )
554
+
555
+
556
+ # ========== public runner ==========================================================
557
+
558
+ def run_app(host: str = "0.0.0.0", port: int = 8080, debug: bool = False):
559
+ os.makedirs("uploads", exist_ok=True) # if you later add upload support
560
+ socketio.run(app, host=host, port=port, debug=debug)
561
+
562
+
563
+ # ---------------------------------------------------------------------
564
+ if __name__ == "__main__": # script usage: python pips/web_app.py --port 5000
565
+ import argparse
566
+ ap = argparse.ArgumentParser()
567
+ ap.add_argument("--host", default="0.0.0.0")
568
+ ap.add_argument("--port", type=int, default=8080)
569
+ ap.add_argument("--debug", action="store_true")
570
+ args = ap.parse_args()
571
+ run_app(args.host, args.port, args.debug)