Spaces:

FireBird-Tech
/

auto-analyst-backend

Running on CPU Upgrade

App Files Files

Arslan1997 commited on 26 days ago

Commit

11794cc

0 Parent(s):

lfg

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +18 -0
.env-template +15 -0
.gitattributes +38 -0
.gitignore +48 -0
Dockerfile +34 -0
Housing.csv +546 -0
Procfile +1 -0
README.md +10 -0
agents_config.json +149 -0
app.py +1589 -0
docs/README.md +251 -0
docs/api/README.md +23 -0
docs/api/routes/analytics.md +562 -0
docs/api/routes/chats.md +181 -0
docs/api/routes/code.md +182 -0
docs/api/routes/deep_analysis.md +348 -0
docs/api/routes/feedback.md +153 -0
docs/api/routes/session.md +273 -0
docs/api/routes/templates.md +363 -0
docs/architecture/architecture.md +427 -0
docs/development/development_workflow.md +506 -0
docs/getting_started.md +273 -0
docs/system/database-schema.md +289 -0
docs/system/shared_dataframe.md +91 -0
docs/troubleshooting/troubleshooting.md +537 -0
entrypoint_local.sh +175 -0
images/AI snapshot-chat.png +3 -0
images/Auto-Analyst Banner.png +3 -0
images/Auto-analyst-poster.png +3 -0
images/Auto-analysts icon small.png +3 -0
images/auto-analyst logo.png +3 -0
requirements.txt +62 -0
scripts/__init__.py +0 -0
scripts/format_response.py +1112 -0
scripts/init_production_db.py +191 -0
scripts/populate_agent_templates.py +508 -0
scripts/tier_maker.py +86 -0
src/__init__.py +0 -0
src/agents/agents.py +0 -0
src/agents/deep_agents.py +1085 -0
src/agents/marketing_analytics_agents.py +75 -0
src/agents/memory_agents.py +68 -0
src/agents/retrievers/retrievers.py +153 -0
src/db/__init__.py +0 -0
src/db/init_db.py +68 -0
src/db/schemas/__init__.py +0 -0
src/db/schemas/models.py +237 -0
src/managers/ai_manager.py +84 -0
src/managers/chat_manager.py +944 -0
src/managers/session_manager.py +437 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,18 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.db
+*.sqlite3
+*.log
+.env
+venv/
+data/
+notebooks/
+*.ipynb
+.idea/
+.vscode/
+.DS_Store
+# Exclude most JSON files but allow agents_config.json
+*.json
+!agents_config.json

.env-template ADDED Viewed

	@@ -0,0 +1,15 @@

+OPENAI_API_KEY=your-openai-api-key-here
+MODEL_PROVIDER=openai  # openai, groq, anthropic, gemini
+MODEL_NAME=gpt-4o-mini
+TEMPERATURE=0.7
+MAX_TOKENS=6000
+GROQ_API_KEY=your-groq-api-key-here
+ANTHROPIC_API_KEY=your-anthropic-api-key-here
+GEMINI_API_KEY=your-gemini-api-key-here
+ADMIN_API_KEY=admin123
+DATABASE_URL=sqlite:///chat_database.db
+ENVIRONMENT="development"
+FRONTEND_URL="http://localhost:3000/"

.gitattributes ADDED Viewed

	@@ -0,0 +1,38 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+images/Auto-Analyst[[:space:]]Banner.png filter=lfs diff=lfs merge=lfs -text
+chat_database.db filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,48 @@

+flask_app/_pycache__
+flask_app/instance
+flask_app/*.env
+*.env
+__pycache__
+venv/
+.env
+try*
+logs/
+updated_code.py
+sample_code.py
+*.dump
+migrations/
+*.pyc
+alembic.ini
+*.db
+schema*.md
+# agent_config.json
+notebooks/
+testing.ipynb
+redis_index.json
+email_to_userid_mapping.json
+redis_backup_20250906_143859.json
+"*.db"
+"*.sqlite"
+"*.sqlite3"
+"venv/"
+"__pycache__/"
+"*.pyc"

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.12
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+# Verify agents_config.json was copied (it should be in the backend directory)
+RUN if [ -f "/app/agents_config.json" ]; then \
+        echo "✅ agents_config.json found in container"; \
+        ls -la /app/agents_config.json; \
+    else \
+        echo "⚠️ agents_config.json not found in container - will use fallback templates"; \
+        ls -la /app/ | grep -E "agents|config" || echo "No config files found"; \
+    fi
+# Make entrypoint script executable
+USER root
+RUN chmod +x /app/entrypoint_local.sh
+# Make populate script executable
+RUN chmod +x /app/scripts/populate_agent_templates.py
+USER user
+# Use the entrypoint script instead of directly running uvicorn
+CMD ["/app/entrypoint_local.sh"]

Housing.csv ADDED Viewed

	@@ -0,0 +1,546 @@

+price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
+13300000,7420,4,2,3,yes,yes,no,no,yes,2,no,semi-furnished
+12250000,8960,4,4,4,yes,no,yes,yes,yes,3,yes,furnished
+12250000,9960,3,2,2,yes,yes,no,no,yes,2,yes,unfurnished
+12215000,7500,4,2,2,yes,yes,no,yes,yes,3,no,furnished
+11410000,7420,4,1,2,yes,yes,yes,no,no,2,yes,unfurnished
+10850000,7500,3,3,1,yes,no,yes,no,no,2,yes,semi-furnished
+10150000,8580,4,3,4,yes,yes,no,no,yes,2,yes,semi-furnished
+10150000,16200,5,3,2,yes,yes,yes,no,yes,0,no,unfurnished
+9870000,8100,4,1,2,yes,yes,no,no,no,2,no,semi-furnished
+9800000,5750,3,2,4,yes,no,yes,yes,yes,1,yes,semi-furnished
+9800000,13200,3,1,2,yes,yes,no,yes,yes,2,no,semi-furnished
+9681000,6000,4,3,2,yes,yes,no,no,yes,2,yes,semi-furnished
+9310000,6550,4,2,2,yes,yes,no,no,no,1,no,unfurnished
+9240000,3500,4,2,2,yes,yes,no,yes,yes,2,yes,furnished
+9240000,7800,3,2,2,yes,no,no,no,no,0,yes,furnished
+9100000,6000,4,1,2,yes,yes,no,no,yes,2,no,semi-furnished
+9100000,6600,4,2,2,yes,no,yes,yes,no,1,yes,furnished
+8960000,8500,3,2,4,yes,no,yes,no,no,2,no,furnished
+8890000,4600,3,2,2,yes,no,yes,no,yes,2,no,semi-furnished
+8855000,6420,3,2,2,yes,yes,no,yes,no,1,yes,semi-furnished
+8750000,4320,3,1,2,yes,no,no,yes,yes,2,yes,furnished
+8680000,7155,3,2,1,yes,yes,no,yes,no,2,no,unfurnished
+8645000,8050,3,1,1,yes,no,no,no,yes,1,yes,semi-furnished
+8645000,4560,3,2,2,yes,no,yes,yes,yes,1,yes,unfurnished
+8575000,8800,3,2,2,yes,no,yes,yes,no,2,yes,furnished
+8540000,6540,4,2,2,yes,no,no,yes,yes,2,yes,semi-furnished
+8463000,6000,3,2,4,yes,no,yes,yes,yes,0,yes,unfurnished
+8400000,8875,3,1,1,yes,no,yes,yes,no,1,no,semi-furnished
+8400000,7950,5,2,2,yes,no,no,yes,yes,2,no,furnished
+8400000,5500,4,2,2,yes,no,yes,no,yes,1,yes,unfurnished
+8400000,7475,3,2,4,yes,yes,no,no,no,2,yes,furnished
+8400000,7000,3,1,4,yes,yes,yes,yes,no,2,yes,furnished
+8295000,4880,4,2,2,yes,no,no,yes,yes,1,no,semi-furnished
+8190000,5960,3,3,2,yes,no,no,yes,yes,1,yes,unfurnished
+8120000,6840,5,1,2,yes,no,no,no,no,1,no,furnished
+8080940,7000,3,2,4,yes,yes,no,no,no,2,no,furnished
+8043000,7482,3,2,3,yes,no,yes,no,yes,1,no,unfurnished
+7980000,9000,4,2,4,yes,yes,no,no,no,2,yes,furnished
+7962500,6000,3,1,4,yes,yes,no,no,yes,2,no,furnished
+7910000,6000,4,2,4,yes,yes,no,no,yes,1,yes,unfurnished
+7875000,6550,3,1,2,yes,yes,yes,no,yes,0,no,unfurnished
+7840000,6360,3,2,4,yes,yes,no,no,yes,0,yes,unfurnished
+7700000,6480,3,2,4,yes,no,yes,no,yes,2,no,unfurnished
+7700000,6000,4,2,4,yes,no,no,yes,no,2,yes,semi-furnished
+7560000,6000,4,2,4,yes,no,yes,yes,no,1,no,unfurnished
+7560000,6000,3,2,3,yes,no,yes,yes,no,0,yes,semi-furnished
+7525000,6000,3,2,4,yes,no,no,yes,no,1,yes,semi-furnished
+7490000,6600,3,1,4,yes,yes,yes,no,yes,3,yes,semi-furnished
+7455000,4300,3,2,2,yes,no,no,no,yes,1,yes,furnished
+7420000,7440,3,2,1,yes,no,no,yes,yes,0,no,semi-furnished
+7420000,7440,3,2,4,yes,yes,no,no,yes,1,no,semi-furnished
+7420000,6325,3,1,4,yes,no,yes,yes,no,1,yes,furnished
+7350000,6000,4,2,4,yes,yes,yes,no,yes,1,no,furnished
+7350000,5150,3,2,4,yes,no,no,yes,yes,2,yes,furnished
+7350000,6000,3,2,2,yes,yes,no,no,no,1,yes,unfurnished
+7350000,6000,3,1,2,yes,no,yes,no,no,1,no,furnished
+7343000,11440,4,1,2,yes,no,no,yes,no,1,yes,furnished
+7245000,9000,4,2,4,yes,yes,no,no,no,1,yes,semi-furnished
+7210000,7680,4,2,4,yes,yes,no,yes,yes,1,no,furnished
+7210000,6000,3,2,4,yes,yes,yes,yes,yes,1,yes,unfurnished
+7140000,6000,3,2,2,yes,yes,yes,yes,no,1,yes,furnished
+7070000,8880,2,1,1,yes,yes,yes,yes,yes,1,yes,unfurnished
+7070000,6240,4,2,2,yes,yes,no,no,yes,1,yes,furnished
+7035000,6360,4,2,3,yes,yes,no,no,yes,2,no,furnished
+7000000,11175,3,1,1,yes,yes,no,no,no,1,no,furnished
+6930000,8880,3,2,2,yes,no,yes,yes,yes,1,no,furnished
+6930000,13200,2,1,1,yes,no,no,no,no,1,yes,unfurnished
+6895000,7700,3,2,1,yes,yes,no,yes,yes,2,yes,unfurnished
+6860000,6000,3,1,1,yes,no,no,no,yes,1,no,semi-furnished
+6790000,12090,4,2,2,yes,no,no,no,yes,2,yes,unfurnished
+6790000,4000,3,2,2,yes,no,yes,no,no,0,yes,furnished
+6755000,6000,4,2,4,yes,no,yes,yes,yes,0,yes,semi-furnished
+6720000,5020,3,1,4,yes,yes,no,no,no,0,yes,semi-furnished
+6685000,6600,2,2,4,yes,no,yes,no,no,0,no,semi-furnished
+6650000,4040,3,1,2,yes,yes,yes,yes,no,1,no,furnished
+6650000,4260,4,2,2,yes,no,no,no,yes,0,no,unfurnished
+6650000,6420,3,2,3,yes,no,no,yes,yes,0,yes,furnished
+6650000,6500,3,2,3,yes,no,yes,no,yes,0,no,semi-furnished
+6650000,5700,3,1,1,yes,yes,no,yes,yes,2,no,furnished
+6650000,6000,3,2,3,yes,no,yes,yes,no,0,yes,furnished
+6629000,6000,3,1,2,yes,yes,no,no,no,1,yes,unfurnished
+6615000,4000,3,2,2,yes,no,yes,yes,no,1,no,unfurnished
+6615000,10500,3,2,1,yes,yes,no,yes,yes,1,no,semi-furnished
+6580000,6000,3,2,4,yes,no,yes,yes,no,0,no,furnished
+6510000,3760,3,1,2,yes,yes,yes,yes,yes,2,no,furnished
+6510000,8250,3,2,3,yes,yes,yes,yes,no,0,yes,unfurnished
+6510000,6670,3,1,3,yes,no,no,yes,yes,0,no,furnished
+6475000,3960,3,1,1,yes,yes,yes,no,yes,2,no,furnished
+6475000,7410,3,1,1,yes,no,yes,yes,no,2,yes,furnished
+6440000,8580,5,3,2,yes,no,no,yes,no,2,no,unfurnished
+6440000,5000,3,1,2,yes,no,no,no,no,0,yes,semi-furnished
+6419000,6750,2,1,1,yes,no,yes,no,no,2,yes,semi-furnished
+6405000,4800,3,2,4,yes,no,yes,no,yes,0,yes,furnished
+6300000,7200,3,2,1,yes,no,no,no,no,3,no,semi-furnished
+6300000,6000,4,2,4,yes,no,no,no,no,1,no,unfurnished
+6300000,4100,3,2,3,yes,no,no,no,no,2,yes,unfurnished
+6300000,9000,3,1,1,yes,no,no,no,yes,1,no,semi-furnished
+6300000,6400,3,1,1,yes,no,no,yes,yes,1,no,semi-furnished
+6293000,6600,3,2,3,yes,no,yes,yes,no,0,yes,furnished
+6265000,6000,4,1,3,yes,yes,no,no,yes,0,yes,semi-furnished
+6230000,6600,3,2,1,yes,yes,yes,yes,no,0,no,furnished
+6230000,5500,3,1,3,yes,no,yes,yes,yes,1,yes,furnished
+6195000,5500,3,2,4,yes,no,yes,yes,yes,1,no,unfurnished
+6195000,6350,3,2,3,yes,no,no,no,yes,0,no,unfurnished
+6195000,5500,3,2,1,yes,no,yes,no,yes,2,no,unfurnished
+6160000,4500,3,1,4,yes,no,yes,yes,no,0,no,unfurnished
+6160000,5450,4,2,1,yes,no,no,no,no,0,yes,semi-furnished
+6125000,6420,3,1,3,yes,no,yes,yes,no,0,yes,unfurnished
+6107500,3240,4,1,3,yes,no,yes,yes,yes,1,yes,semi-furnished
+6090000,6615,4,2,2,yes,yes,no,yes,no,1,yes,unfurnished
+6090000,6600,3,1,1,yes,no,yes,yes,no,2,no,semi-furnished
+6090000,8372,3,1,3,yes,yes,yes,no,no,2,no,furnished
+6083000,4300,6,2,2,yes,no,yes,no,yes,0,yes,semi-furnished
+6083000,9620,3,1,1,yes,no,no,no,yes,2,no,semi-furnished
+6020000,6800,2,1,1,yes,yes,no,yes,yes,2,no,unfurnished
+6020000,8000,3,1,1,yes,no,no,no,no,2,no,semi-furnished
+6020000,6900,3,2,1,yes,yes,yes,yes,no,0,yes,furnished
+5950000,3700,4,1,2,yes,no,yes,no,no,0,yes,furnished
+5950000,6420,3,1,1,yes,no,no,yes,yes,0,yes,unfurnished
+5950000,7020,3,1,1,yes,yes,yes,yes,yes,2,no,furnished
+5950000,6540,3,1,1,yes,no,yes,yes,no,2,yes,semi-furnished
+5950000,7231,3,1,2,yes,yes,no,no,yes,0,yes,furnished
+5950000,6254,4,2,1,yes,no,no,no,yes,1,yes,unfurnished
+5950000,7320,4,2,2,yes,yes,yes,no,yes,0,yes,semi-furnished
+5950000,6525,3,2,4,yes,yes,yes,no,yes,1,yes,unfurnished
+5943000,15600,3,1,1,yes,no,yes,yes,no,2,yes,semi-furnished
+5880000,7160,3,1,1,yes,no,no,no,yes,2,no,furnished
+5880000,6500,3,2,3,yes,yes,no,no,yes,0,no,unfurnished
+5873000,5500,3,1,3,yes,no,no,yes,yes,1,yes,unfurnished
+5873000,11460,3,1,3,yes,no,no,yes,yes,2,yes,semi-furnished
+5866000,4800,3,1,1,yes,no,yes,yes,no,0,no,unfurnished
+5810000,5828,4,1,4,yes,yes,no,no,no,0,no,semi-furnished
+5810000,5200,3,1,3,yes,yes,no,yes,yes,0,yes,semi-furnished
+5810000,4800,3,1,3,yes,yes,no,yes,yes,0,yes,semi-furnished
+5803000,7000,3,1,1,yes,yes,no,yes,yes,2,yes,semi-furnished
+5775000,6000,3,2,4,yes,yes,no,no,yes,0,yes,furnished
+5740000,5400,4,2,2,yes,yes,yes,yes,yes,2,no,semi-furnished
+5740000,4640,4,1,2,yes,yes,yes,no,yes,1,yes,unfurnished
+5740000,5000,3,1,3,yes,yes,no,no,no,0,yes,furnished
+5740000,6360,3,1,1,yes,yes,yes,yes,yes,2,yes,unfurnished
+5740000,5800,3,2,4,yes,no,no,yes,no,0,yes,furnished
+5652500,6660,4,2,2,yes,yes,no,no,no,1,no,semi-furnished
+5600000,10500,4,2,2,yes,no,yes,yes,yes,1,no,furnished
+5600000,4800,5,2,3,no,no,yes,yes,no,0,no,furnished
+5600000,4700,4,1,2,yes,no,no,no,no,1,yes,unfurnished
+5600000,5000,3,1,4,yes,yes,no,no,no,0,yes,unfurnished
+5600000,10500,2,1,1,yes,yes,no,yes,no,1,no,semi-furnished
+5600000,5500,3,2,2,yes,yes,yes,no,no,1,yes,furnished
+5600000,6360,3,1,3,yes,yes,no,yes,no,0,yes,furnished
+5600000,6600,4,2,1,yes,no,no,yes,yes,0,no,furnished
+5600000,5136,3,1,2,yes,yes,no,yes,no,0,no,semi-furnished
+5565000,4400,4,1,2,yes,yes,yes,no,no,2,no,semi-furnished
+5565000,5400,5,1,2,yes,yes,no,yes,no,0,no,semi-furnished
+5530000,3300,3,3,2,yes,yes,no,yes,yes,0,yes,semi-furnished
+5530000,3650,3,2,2,yes,yes,yes,yes,no,2,yes,semi-furnished
+5530000,6100,3,2,1,yes,no,no,no,yes,2,no,unfurnished
+5523000,6900,3,1,1,yes,yes,no,no,no,0,yes,furnished
+5495000,2817,4,2,2,no,no,yes,no,yes,1,no,furnished
+5495000,7980,3,1,1,yes,yes,no,no,yes,2,no,furnished
+5460000,3150,3,2,1,yes,no,yes,no,no,0,yes,furnished
+5460000,6210,4,1,4,yes,yes,yes,yes,no,0,no,furnished
+5460000,6100,3,1,3,yes,yes,no,no,yes,0,yes,furnished
+5460000,6600,4,2,2,yes,no,no,no,no,0,yes,furnished
+5425000,6825,3,1,1,yes,no,no,no,yes,0,yes,furnished
+5390000,6710,3,2,2,yes,no,no,yes,yes,1,yes,unfurnished
+5383000,6450,3,2,1,yes,yes,yes,no,yes,0,no,furnished
+5320000,7800,3,1,1,yes,no,no,no,yes,2,yes,unfurnished
+5285000,4600,2,2,1,yes,yes,yes,yes,yes,2,no,semi-furnished
+5250000,4260,4,1,2,yes,yes,no,no,no,0,no,furnished
+5250000,6540,4,2,2,no,no,no,yes,yes,0,no,semi-furnished
+5250000,5500,3,2,1,yes,no,no,yes,yes,0,no,semi-furnished
+5250000,10269,3,1,1,yes,yes,yes,yes,no,1,yes,semi-furnished
+5250000,8400,3,1,2,yes,yes,no,yes,yes,2,yes,unfurnished
+5250000,5300,4,2,1,yes,no,no,yes,no,0,no,semi-furnished
+5250000,3800,3,1,2,yes,no,no,no,no,1,no,semi-furnished
+5250000,9800,4,2,2,yes,no,no,yes,no,2,yes,semi-furnished
+5250000,8520,3,1,1,yes,yes,yes,yes,yes,2,no,furnished
+5243000,6050,3,1,1,yes,yes,no,no,yes,0,yes,furnished
+5229000,7085,3,1,1,yes,yes,yes,yes,yes,2,no,unfurnished
+5215000,3180,3,2,2,yes,yes,yes,no,no,2,yes,unfurnished
+5215000,4500,4,2,1,no,yes,yes,no,yes,2,no,furnished
+5215000,7200,3,1,2,yes,yes,yes,no,no,1,no,unfurnished
+5145000,3410,3,1,2,no,no,yes,yes,yes,0,yes,furnished
+5145000,7980,3,1,1,yes,yes,yes,yes,no,1,no,semi-furnished
+5110000,3000,3,2,2,yes,yes,no,yes,yes,0,no,semi-furnished
+5110000,3000,3,1,2,yes,yes,yes,no,yes,0,yes,unfurnished
+5110000,11410,2,1,2,yes,no,yes,yes,yes,0,no,furnished
+5110000,6100,3,1,1,yes,yes,yes,yes,no,0,yes,furnished
+5075000,5720,2,1,2,yes,yes,no,yes,no,0,yes,unfurnished
+5040000,3540,2,1,1,no,no,no,no,yes,0,no,furnished
+5040000,7600,4,1,2,yes,yes,yes,no,yes,2,no,semi-furnished
+5040000,10700,3,1,2,yes,yes,no,yes,no,0,no,unfurnished
+5040000,6600,3,1,1,yes,yes,yes,yes,no,0,no,unfurnished
+5033000,4800,2,1,1,yes,yes,yes,yes,no,0,no,furnished
+5005000,8150,3,2,1,yes,yes,no,yes,yes,0,no,unfurnished
+4970000,4410,4,3,2,yes,no,yes,no,no,2,no,furnished
+4970000,7686,3,1,1,yes,no,no,no,yes,0,yes,furnished
+4956000,2800,3,2,2,no,no,no,no,no,1,no,furnished
+4935000,5948,3,1,2,yes,yes,yes,no,yes,0,yes,furnished
+4907000,4200,3,1,2,yes,yes,yes,yes,no,1,no,furnished
+4900000,4520,3,1,2,yes,yes,no,yes,no,0,no,unfurnished
+4900000,4095,3,1,2,no,no,yes,yes,no,0,yes,semi-furnished
+4900000,4120,2,1,1,yes,yes,yes,no,no,1,no,semi-furnished
+4900000,5400,4,1,2,yes,yes,yes,yes,no,0,no,unfurnished
+4900000,4770,3,1,1,yes,no,no,no,yes,0,yes,unfurnished
+4900000,6300,3,1,1,yes,yes,no,no,no,2,no,semi-furnished
+4900000,5800,2,1,1,yes,no,yes,yes,no,0,no,semi-furnished
+4900000,3000,3,1,2,yes,no,no,yes,no,0,no,semi-furnished
+4900000,2970,3,1,3,yes,no,no,yes,yes,0,yes,semi-furnished
+4900000,6720,3,1,1,yes,yes,no,no,no,0,no,semi-furnished
+4900000,4646,3,1,2,yes,yes,no,yes,yes,2,no,furnished
+4900000,12900,3,1,1,yes,yes,no,no,no,2,no,unfurnished
+4893000,3420,4,2,2,yes,yes,yes,yes,no,2,no,furnished
+4893000,4995,4,2,1,yes,yes,yes,no,yes,0,yes,semi-furnished
+4865000,4350,2,1,1,yes,yes,yes,no,no,0,yes,semi-furnished
+4830000,4160,3,1,3,yes,yes,no,yes,yes,0,no,semi-furnished
+4830000,6040,3,1,1,yes,no,no,no,yes,2,no,furnished
+4830000,6862,3,1,2,yes,yes,yes,no,no,2,yes,furnished
+4830000,4815,2,1,1,yes,no,no,yes,yes,0,yes,semi-furnished
+4795000,7000,3,1,2,yes,yes,yes,no,no,0,no,furnished
+4795000,8100,4,1,4,yes,yes,yes,yes,no,2,yes,furnished
+4767000,3420,4,2,2,yes,yes,yes,no,yes,0,no,semi-furnished
+4760000,9166,2,1,1,yes,no,no,yes,yes,2,yes,furnished
+4760000,6321,3,1,2,yes,no,no,yes,yes,1,yes,unfurnished
+4760000,10240,2,1,1,yes,no,yes,no,yes,2,yes,unfurnished
+4753000,6440,2,1,1,yes,no,yes,no,no,3,yes,unfurnished
+4690000,5170,3,1,4,yes,yes,yes,no,no,0,no,unfurnished
+4690000,6000,2,1,1,yes,no,yes,no,yes,1,yes,semi-furnished
+4690000,3630,3,1,2,yes,yes,no,no,no,2,yes,semi-furnished
+4690000,9667,4,2,2,yes,yes,no,yes,no,1,no,semi-furnished
+4690000,5400,2,1,2,yes,no,yes,no,no,0,yes,unfurnished
+4690000,4320,3,1,1,yes,no,yes,yes,no,0,yes,unfurnished
+4655000,3745,3,1,2,yes,no,no,yes,yes,0,no,furnished
+4620000,4160,3,1,1,yes,no,no,no,yes,0,yes,furnished
+4620000,3880,3,2,2,yes,no,yes,yes,no,2,no,unfurnished
+4620000,5680,3,1,2,yes,no,yes,yes,yes,1,no,unfurnished
+4620000,2870,2,1,2,yes,no,yes,yes,yes,0,no,unfurnished
+4620000,5010,3,1,2,yes,no,no,yes,yes,0,no,unfurnished
+4613000,4510,4,2,2,yes,yes,no,no,no,0,yes,furnished
+4585000,4000,3,1,2,yes,no,no,yes,no,1,no,unfurnished
+4585000,3840,3,1,2,yes,no,yes,no,no,1,yes,furnished
+4550000,3760,3,1,1,yes,yes,yes,no,yes,2,yes,unfurnished
+4550000,3640,3,1,2,yes,no,no,yes,no,0,yes,unfurnished
+4550000,2550,3,1,2,yes,yes,no,no,no,0,yes,unfurnished
+4550000,5320,3,1,2,yes,yes,yes,no,yes,0,yes,furnished
+4550000,5360,3,1,2,yes,no,no,no,no,2,yes,furnished
+4550000,3520,3,1,1,yes,yes,no,yes,no,0,yes,semi-furnished
+4550000,8400,4,1,4,yes,yes,yes,no,yes,3,yes,unfurnished
+4543000,4100,2,2,1,yes,yes,no,no,yes,0,yes,semi-furnished
+4543000,4990,4,2,2,yes,yes,yes,no,yes,0,yes,semi-furnished
+4515000,3510,3,1,3,yes,no,yes,yes,no,0,no,furnished
+4515000,3450,3,1,2,yes,yes,yes,no,no,1,no,furnished
+4515000,9860,3,1,1,yes,no,yes,yes,no,0,no,semi-furnished
+4515000,3520,2,1,2,yes,yes,yes,yes,yes,0,yes,unfurnished
+4480000,4510,4,1,2,yes,yes,yes,yes,yes,2,no,furnished
+4480000,5885,2,1,1,yes,yes,yes,yes,no,1,no,unfurnished
+4480000,4000,3,1,2,yes,yes,no,yes,no,2,no,unfurnished
+4480000,8250,3,1,1,yes,no,yes,yes,yes,0,yes,semi-furnished
+4480000,4040,3,1,2,yes,no,yes,no,yes,1,yes,semi-furnished
+4473000,6360,2,1,1,yes,yes,no,yes,yes,1,yes,semi-furnished
+4473000,3162,3,1,2,yes,yes,no,no,yes,1,yes,semi-furnished
+4473000,3510,3,1,2,yes,no,no,no,no,0,no,furnished
+4445000,3750,2,1,1,yes,yes,yes,yes,yes,0,no,furnished
+4410000,3968,3,1,2,no,yes,no,no,no,0,no,semi-furnished
+4410000,4900,2,1,2,yes,yes,yes,no,no,0,no,furnished
+4403000,2880,3,1,2,yes,no,yes,no,no,0,yes,semi-furnished
+4403000,4880,3,1,1,yes,no,yes,no,yes,2,yes,unfurnished
+4403000,4920,3,1,2,yes,no,yes,yes,yes,1,yes,unfurnished
+4382000,4950,4,1,2,yes,yes,no,no,yes,0,no,unfurnished
+4375000,3900,3,1,2,yes,yes,no,yes,yes,0,yes,semi-furnished
+4340000,4500,3,2,3,yes,no,yes,no,no,1,yes,unfurnished
+4340000,1905,5,1,2,no,no,yes,no,yes,0,no,furnished
+4340000,4075,3,1,1,yes,no,yes,yes,yes,2,no,unfurnished
+4340000,3500,4,1,2,yes,no,no,no,no,2,yes,unfurnished
+4340000,6450,4,1,2,yes,yes,yes,no,yes,0,yes,unfurnished
+4319000,4032,2,1,1,yes,no,yes,no,no,0,yes,furnished
+4305000,4400,2,1,1,yes,yes,no,yes,no,1,yes,semi-furnished
+4305000,10360,2,1,1,yes,no,no,no,no,1,yes,furnished
+4277000,3400,3,1,2,yes,yes,yes,yes,no,2,yes,semi-furnished
+4270000,6360,2,1,1,yes,no,no,no,no,0,no,semi-furnished
+4270000,6360,2,1,2,yes,no,no,yes,no,0,no,unfurnished
+4270000,4500,2,1,1,yes,no,yes,yes,no,2,no,semi-furnished
+4270000,2175,3,1,2,no,no,no,no,no,0,yes,furnished
+4270000,4360,4,1,2,yes,yes,yes,no,yes,0,no,furnished
+4270000,7770,2,1,1,yes,no,no,yes,yes,1,yes,semi-furnished
+4235000,6650,3,1,2,yes,yes,yes,yes,yes,0,yes,unfurnished
+4235000,2787,3,1,1,yes,yes,yes,no,yes,0,no,unfurnished
+4200000,5500,3,1,2,yes,yes,yes,yes,no,0,yes,furnished
+4200000,5040,3,1,2,yes,yes,no,no,yes,0,yes,unfurnished
+4200000,5850,2,1,1,yes,no,yes,no,no,2,no,semi-furnished
+4200000,2610,4,3,2,no,yes,no,no,yes,0,no,semi-furnished
+4200000,2953,3,1,2,yes,yes,yes,no,yes,0,no,furnished
+4200000,2747,4,2,2,no,yes,yes,yes,yes,0,yes,semi-furnished
+4200000,4410,2,1,1,no,no,no,no,no,1,no,furnished
+4200000,4000,4,2,2,no,no,yes,yes,yes,0,no,semi-furnished
+4200000,2325,3,1,2,no,no,yes,no,no,0,yes,furnished
+4200000,4600,3,2,2,yes,no,no,yes,no,1,no,unfurnished
+4200000,3640,3,2,2,yes,yes,yes,no,yes,0,no,semi-furnished
+4200000,5800,3,1,1,yes,yes,yes,yes,yes,2,no,semi-furnished
+4200000,7000,3,1,1,yes,no,yes,yes,yes,3,yes,furnished
+4200000,4079,3,1,3,yes,yes,yes,no,no,0,yes,furnished
+4200000,3520,3,1,2,yes,yes,no,no,yes,0,yes,semi-furnished
+4200000,2145,3,1,3,yes,yes,no,no,no,1,yes,furnished
+4200000,4500,3,1,1,yes,no,yes,no,no,0,no,semi-furnished
+4193000,8250,3,1,1,yes,no,no,yes,yes,3,yes,unfurnished
+4193000,3450,3,1,2,yes,yes,yes,no,yes,1,yes,furnished
+4165000,4840,3,1,2,yes,no,no,yes,no,1,no,semi-furnished
+4165000,4080,3,1,2,yes,no,yes,yes,no,2,no,unfurnished
+4165000,4046,3,1,2,yes,no,yes,no,no,1,no,unfurnished
+4130000,4632,4,1,2,yes,no,yes,yes,yes,0,yes,semi-furnished
+4130000,5985,3,1,1,yes,no,yes,yes,no,0,yes,furnished
+4123000,6060,2,1,1,yes,yes,yes,yes,yes,1,yes,semi-furnished
+4098500,3600,3,1,1,yes,no,yes,yes,no,0,no,unfurnished
+4095000,3680,3,2,2,yes,yes,no,no,no,0,no,semi-furnished
+4095000,4040,2,1,2,yes,yes,yes,no,yes,1,yes,furnished
+4095000,5600,2,1,1,yes,no,no,no,no,0,no,unfurnished
+4060000,5900,4,2,2,no,yes,yes,no,yes,1,no,semi-furnished
+4060000,4992,3,2,2,yes,yes,no,yes,yes,2,no,furnished
+4060000,4340,3,1,1,yes,yes,no,yes,no,0,yes,unfurnished
+4060000,3000,4,1,3,yes,no,no,no,yes,2,no,furnished
+4060000,4320,3,1,2,yes,no,yes,no,yes,2,no,unfurnished
+4025000,3630,3,2,2,yes,yes,yes,no,no,2,yes,semi-furnished
+4025000,3460,3,2,1,yes,no,no,yes,yes,1,yes,furnished
+4025000,5400,3,1,1,yes,no,yes,no,yes,3,no,unfurnished
+4007500,4500,3,1,2,no,no,no,no,yes,0,no,furnished
+4007500,3460,4,1,2,yes,no,yes,no,no,0,yes,furnished
+3990000,4100,4,1,1,no,yes,no,yes,yes,0,yes,semi-furnished
+3990000,6480,3,1,2,no,yes,no,yes,no,1,no,furnished
+3990000,4500,3,2,2,no,no,yes,yes,no,0,no,unfurnished
+3990000,3960,3,1,2,yes,no,no,yes,yes,0,no,unfurnished
+3990000,4050,2,1,2,yes,yes,yes,yes,yes,0,no,unfurnished
+3920000,7260,3,2,1,yes,yes,no,yes,yes,3,yes,unfurnished
+3920000,5500,4,1,2,yes,no,no,yes,no,0,no,semi-furnished
+3920000,3000,3,1,2,yes,yes,yes,yes,yes,0,no,furnished
+3920000,3290,2,1,1,yes,no,yes,no,yes,1,yes,semi-furnished
+3920000,3816,2,1,1,yes,no,yes,no,yes,2,no,furnished
+3920000,8080,3,1,1,yes,yes,no,yes,yes,2,yes,furnished
+3920000,2145,4,2,1,yes,yes,yes,yes,yes,0,no,furnished
+3885000,3780,2,1,2,yes,no,no,no,no,0,no,semi-furnished
+3885000,3180,4,2,2,yes,no,no,yes,no,0,yes,furnished
+3850000,5300,5,2,2,yes,yes,yes,yes,yes,0,yes,semi-furnished
+3850000,3180,2,2,1,yes,no,yes,yes,yes,2,no,unfurnished
+3850000,7152,3,1,2,yes,yes,no,no,yes,0,no,unfurnished
+3850000,4080,2,1,1,yes,no,no,no,yes,0,no,furnished
+3850000,3850,2,1,1,yes,yes,yes,no,no,0,yes,semi-furnished
+3850000,2015,3,1,2,yes,yes,yes,no,yes,0,no,unfurnished
+3850000,2176,2,1,2,yes,yes,yes,no,no,0,yes,semi-furnished
+3836000,3350,3,1,2,yes,no,yes,yes,no,0,yes,semi-furnished
+3815000,3150,2,2,1,no,no,yes,no,yes,0,no,furnished
+3780000,4820,3,1,2,yes,yes,no,yes,no,0,yes,unfurnished
+3780000,3420,2,1,2,yes,no,no,yes,no,1,no,furnished
+3780000,3600,2,1,1,yes,yes,no,no,no,0,no,furnished
+3780000,5830,2,1,1,yes,yes,no,no,no,2,no,unfurnished
+3780000,2856,3,1,3,yes,no,yes,yes,yes,0,no,unfurnished
+3780000,8400,2,1,1,yes,no,yes,yes,yes,1,no,semi-furnished
+3773000,8250,3,1,1,yes,yes,yes,yes,no,2,yes,unfurnished
+3773000,2520,5,2,1,no,no,no,no,yes,1,yes,semi-furnished
+3773000,6930,4,1,2,no,no,no,no,yes,1,no,semi-furnished
+3745000,3480,2,1,1,yes,yes,yes,no,yes,0,no,semi-furnished
+3710000,3600,3,1,1,yes,yes,yes,no,yes,1,yes,unfurnished
+3710000,4040,2,1,1,yes,no,no,no,yes,0,yes,furnished
+3710000,6020,3,1,1,yes,yes,no,yes,no,0,yes,semi-furnished
+3710000,4050,2,1,1,yes,yes,yes,no,yes,0,yes,furnished
+3710000,3584,2,1,1,yes,no,yes,yes,yes,0,no,furnished
+3703000,3120,3,1,2,no,yes,no,yes,yes,0,no,unfurnished
+3703000,5450,2,1,1,yes,yes,yes,yes,yes,0,yes,furnished
+3675000,3630,2,1,1,yes,no,yes,no,no,0,no,furnished
+3675000,3630,2,1,1,yes,no,yes,no,no,0,yes,unfurnished
+3675000,5640,2,1,1,no,yes,no,no,no,0,no,semi-furnished
+3675000,3600,2,1,1,yes,yes,no,yes,no,0,yes,furnished
+3640000,4280,2,1,1,yes,no,no,yes,yes,2,no,unfurnished
+3640000,3570,3,1,2,yes,no,no,yes,no,0,yes,furnished
+3640000,3180,3,1,2,no,no,no,yes,yes,0,yes,furnished
+3640000,3000,2,1,2,yes,no,no,yes,yes,0,no,semi-furnished
+3640000,3520,2,2,1,yes,yes,no,yes,yes,0,yes,furnished
+3640000,5960,3,1,2,yes,no,yes,no,yes,0,no,unfurnished
+3640000,4130,3,2,2,yes,no,yes,no,yes,2,no,furnished
+3640000,2850,3,2,2,no,yes,yes,no,no,0,yes,semi-furnished
+3640000,2275,3,1,3,yes,no,yes,no,no,0,no,semi-furnished
+3633000,3520,3,1,1,yes,yes,yes,yes,no,2,no,unfurnished
+3605000,4500,2,1,1,yes,no,yes,yes,no,0,no,semi-furnished
+3605000,4000,2,1,1,yes,no,yes,no,yes,0,no,furnished
+3570000,3150,3,1,2,yes,no,no,yes,no,0,no,semi-furnished
+3570000,4500,4,2,2,yes,yes,no,no,yes,2,yes,unfurnished
+3570000,4500,2,1,1,no,yes,yes,no,no,0,no,semi-furnished
+3570000,3640,2,1,1,yes,no,yes,no,yes,0,no,semi-furnished
+3535000,3850,3,1,1,yes,yes,no,yes,yes,2,no,semi-furnished
+3500000,4240,3,1,2,yes,yes,yes,no,no,0,no,unfurnished
+3500000,3650,3,1,2,yes,yes,yes,no,yes,0,no,semi-furnished
+3500000,4600,4,1,2,yes,yes,no,no,yes,0,yes,semi-furnished
+3500000,2135,3,2,2,no,yes,yes,yes,no,0,no,furnished
+3500000,3036,3,1,2,yes,yes,no,yes,yes,0,yes,unfurnished
+3500000,3990,3,1,2,yes,no,yes,no,no,0,yes,semi-furnished
+3500000,7424,3,1,1,no,no,no,yes,no,0,yes,unfurnished
+3500000,3480,3,1,1,no,yes,no,yes,no,0,yes,semi-furnished
+3500000,3600,6,1,2,yes,no,no,yes,yes,1,no,semi-furnished
+3500000,3640,2,1,1,yes,no,yes,yes,yes,1,yes,semi-furnished
+3500000,5900,2,1,1,yes,no,no,yes,yes,1,yes,furnished
+3500000,3120,3,1,2,yes,no,yes,yes,no,1,no,furnished
+3500000,7350,2,1,1,yes,yes,yes,yes,yes,1,no,furnished
+3500000,3512,2,1,1,yes,no,yes,yes,no,1,no,unfurnished
+3500000,9500,3,1,2,yes,no,yes,yes,no,3,yes,unfurnished
+3500000,5880,2,1,1,yes,yes,no,yes,yes,0,no,furnished
+3500000,12944,3,1,1,yes,yes,no,no,no,0,no,unfurnished
+3493000,4900,3,1,2,no,no,yes,no,yes,0,yes,semi-furnished
+3465000,3060,3,1,1,yes,no,yes,yes,yes,0,no,semi-furnished
+3465000,5320,2,1,1,yes,no,no,no,yes,1,no,semi-furnished
+3465000,2145,3,1,3,yes,no,yes,no,yes,0,no,furnished
+3430000,4000,2,1,1,yes,no,no,yes,yes,0,no,unfurnished
+3430000,3185,2,1,1,yes,no,yes,yes,no,2,yes,furnished
+3430000,3850,3,1,1,yes,yes,yes,yes,no,0,no,unfurnished
+3430000,2145,3,1,3,yes,no,no,no,no,0,no,semi-furnished
+3430000,2610,3,1,2,yes,no,yes,no,yes,0,yes,furnished
+3430000,1950,3,2,2,yes,yes,no,yes,yes,0,no,unfurnished
+3423000,4040,2,1,1,yes,yes,yes,yes,yes,0,yes,unfurnished
+3395000,4785,3,1,2,yes,yes,yes,yes,no,1,no,semi-furnished
+3395000,3450,3,1,1,yes,yes,yes,no,yes,2,yes,furnished
+3395000,3640,2,1,1,yes,no,no,no,no,0,yes,furnished
+3360000,3500,4,1,2,yes,no,yes,no,no,2,yes,furnished
+3360000,4960,4,1,3,no,no,yes,no,yes,0,yes,semi-furnished
+3360000,4120,2,1,2,yes,no,yes,yes,yes,0,no,furnished
+3360000,4750,2,1,1,yes,no,yes,yes,no,0,yes,semi-furnished
+3360000,3720,2,1,1,no,no,no,yes,yes,0,no,semi-furnished
+3360000,3750,3,1,1,yes,yes,no,yes,no,0,yes,semi-furnished
+3360000,3100,3,1,2,no,no,yes,no,yes,0,yes,unfurnished
+3360000,3185,2,1,1,yes,yes,no,no,yes,2,yes,furnished
+3353000,2700,3,1,1,no,yes,yes,yes,no,0,no,furnished
+3332000,2145,3,1,2,yes,no,yes,yes,yes,0,yes,semi-furnished
+3325000,4040,2,1,1,yes,yes,yes,no,no,1,no,semi-furnished
+3325000,4775,4,1,2,yes,no,no,yes,yes,0,no,furnished
+3290000,2500,2,1,1,no,yes,no,no,no,0,no,semi-furnished
+3290000,3180,4,1,2,yes,no,no,yes,no,0,no,furnished
+3290000,6060,3,1,1,yes,yes,no,yes,yes,0,no,unfurnished
+3290000,3480,4,1,2,no,no,no,yes,no,1,no,semi-furnished
+3290000,3792,4,1,2,yes,no,no,yes,yes,0,no,furnished
+3290000,4040,2,1,1,yes,no,no,yes,yes,0,no,semi-furnished
+3290000,2145,3,1,2,yes,no,no,yes,yes,0,no,furnished
+3290000,5880,3,1,1,yes,no,no,yes,no,1,yes,furnished
+3255000,4500,2,1,1,no,yes,yes,no,no,0,yes,semi-furnished
+3255000,3930,2,1,1,no,yes,no,no,yes,0,yes,unfurnished
+3234000,3640,4,1,2,yes,yes,no,no,yes,0,yes,furnished
+3220000,4370,3,1,2,yes,no,no,no,no,0,yes,furnished
+3220000,2684,2,1,1,yes,yes,no,no,yes,1,no,furnished
+3220000,4320,3,1,1,no,no,no,yes,yes,1,no,unfurnished
+3220000,3120,3,1,2,no,no,no,yes,no,0,no,unfurnished
+3150000,3450,1,1,1,yes,yes,no,no,yes,0,no,semi-furnished
+3150000,3986,2,2,1,no,yes,no,yes,no,1,no,unfurnished
+3150000,3500,2,1,1,no,no,no,no,yes,0,no,semi-furnished
+3150000,4095,2,1,1,yes,yes,no,no,no,2,no,semi-furnished
+3150000,1650,3,1,2,no,no,no,yes,yes,0,yes,unfurnished
+3150000,3450,3,1,2,yes,no,yes,yes,yes,0,yes,semi-furnished
+3150000,6750,2,1,1,yes,no,yes,yes,no,0,yes,unfurnished
+3150000,9000,3,1,2,yes,no,no,no,yes,2,yes,semi-furnished
+3150000,3069,2,1,1,yes,no,no,yes,no,1,no,unfurnished
+3143000,4500,3,1,2,yes,yes,yes,no,yes,0,no,unfurnished
+3129000,5495,3,1,1,yes,yes,no,no,no,0,no,semi-furnished
+3118850,2398,3,1,1,yes,no,yes,no,no,0,yes,furnished
+3115000,3000,3,1,1,no,no,yes,yes,no,0,no,semi-furnished
+3115000,3850,3,1,2,yes,yes,yes,yes,no,0,no,unfurnished
+3115000,3500,2,1,1,yes,yes,yes,no,yes,0,no,furnished
+3087000,8100,2,1,1,yes,no,yes,yes,yes,1,yes,semi-furnished
+3080000,4960,2,1,1,yes,yes,no,yes,no,0,no,semi-furnished
+3080000,2160,3,1,2,no,no,yes,yes,no,0,yes,semi-furnished
+3080000,3090,2,1,1,yes,yes,no,no,no,0,yes,furnished
+3080000,4500,2,1,2,yes,no,no,no,yes,1,yes,unfurnished
+3045000,3800,2,1,1,yes,yes,yes,yes,yes,0,yes,semi-furnished
+3010000,3090,3,1,2,no,no,yes,no,yes,0,no,semi-furnished
+3010000,3240,3,1,2,yes,yes,no,yes,yes,2,yes,semi-furnished
+3010000,2835,2,1,1,yes,yes,yes,yes,no,0,yes,semi-furnished
+3010000,4600,2,1,1,yes,no,yes,yes,no,0,no,semi-furnished
+3010000,5076,3,1,1,no,yes,no,yes,no,0,no,unfurnished
+3010000,3750,3,1,2,yes,no,no,no,yes,0,yes,unfurnished
+3010000,3630,4,1,2,yes,yes,yes,no,yes,3,no,semi-furnished
+3003000,8050,2,1,1,yes,no,no,no,yes,0,yes,furnished
+2975000,4352,4,1,2,no,yes,yes,yes,no,1,yes,unfurnished
+2961000,3000,2,1,2,yes,no,yes,yes,no,0,yes,furnished
+2940000,5850,3,1,2,yes,yes,yes,yes,yes,1,yes,unfurnished
+2940000,4960,2,1,1,yes,no,no,no,no,0,no,semi-furnished
+2940000,3600,3,1,2,no,yes,no,yes,yes,1,yes,unfurnished
+2940000,3660,4,1,2,no,no,no,no,yes,0,no,semi-furnished
+2940000,3480,3,1,2,no,no,no,no,no,1,no,unfurnished
+2940000,2700,2,1,1,no,no,no,no,yes,0,no,semi-furnished
+2940000,3150,3,1,2,no,yes,yes,no,no,0,no,semi-furnished
+2940000,6615,3,1,2,yes,no,yes,no,yes,0,yes,semi-furnished
+2870000,3040,2,1,1,no,yes,yes,no,no,0,no,semi-furnished
+2870000,3630,2,1,1,yes,no,yes,yes,no,0,yes,furnished
+2870000,6000,2,1,1,yes,yes,no,yes,no,0,no,semi-furnished
+2870000,5400,4,1,2,yes,no,yes,no,yes,0,no,furnished
+2852500,5200,4,1,3,yes,yes,no,no,yes,0,yes,furnished
+2835000,3300,3,1,2,no,yes,yes,yes,no,1,yes,semi-furnished
+2835000,4350,3,1,2,no,no,no,no,no,1,yes,furnished
+2835000,2640,2,1,1,no,yes,no,no,yes,1,no,unfurnished
+2800000,2650,3,1,2,yes,yes,yes,yes,yes,1,no,semi-furnished
+2800000,3960,3,1,1,yes,no,yes,no,yes,0,yes,furnished
+2730000,6800,2,1,1,yes,yes,yes,no,no,0,yes,semi-furnished
+2730000,4000,3,1,2,yes,yes,yes,no,yes,1,yes,semi-furnished
+2695000,4000,2,1,1,yes,yes,no,yes,yes,0,yes,semi-furnished
+2660000,3934,2,1,1,yes,no,yes,yes,no,0,no,furnished
+2660000,2000,2,1,2,yes,yes,yes,yes,yes,0,yes,unfurnished
+2660000,3630,3,3,2,no,no,no,no,yes,0,yes,unfurnished
+2660000,2800,3,1,1,yes,yes,yes,no,yes,0,yes,furnished
+2660000,2430,3,1,1,no,yes,yes,yes,no,0,yes,semi-furnished
+2660000,3480,2,1,1,yes,yes,yes,no,no,1,no,furnished
+2660000,4000,3,1,1,yes,no,yes,no,no,0,yes,furnished
+2653000,3185,2,1,1,yes,yes,yes,no,no,0,no,furnished
+2653000,4000,3,1,2,yes,yes,no,no,no,0,yes,semi-furnished
+2604000,2910,2,1,1,no,no,yes,no,no,0,yes,furnished
+2590000,3600,2,1,1,yes,yes,yes,yes,no,0,no,semi-furnished
+2590000,4400,2,1,1,yes,no,yes,no,yes,0,yes,furnished
+2590000,3600,2,2,2,yes,no,yes,yes,yes,1,no,unfurnished
+2520000,2880,3,1,1,no,yes,yes,no,no,0,no,furnished
+2520000,3180,3,1,1,no,no,no,yes,yes,0,yes,furnished
+2520000,3000,2,1,2,yes,yes,yes,no,yes,0,no,unfurnished
+2485000,4400,3,1,2,yes,no,no,yes,no,0,no,semi-furnished
+2485000,3000,3,1,2,no,no,no,no,yes,0,no,semi-furnished
+2450000,3210,3,1,2,yes,yes,yes,yes,yes,0,no,furnished
+2450000,3240,2,1,1,no,no,yes,no,yes,1,no,unfurnished
+2450000,3000,2,1,1,yes,no,yes,yes,yes,1,no,semi-furnished
+2450000,3500,2,1,1,yes,no,yes,no,yes,0,no,furnished
+2450000,4840,2,1,2,yes,no,no,yes,no,0,no,unfurnished
+2450000,7700,2,1,1,yes,no,yes,yes,no,0,yes,semi-furnished
+2408000,3635,2,1,1,no,yes,yes,no,no,0,yes,semi-furnished
+2380000,2475,3,1,2,yes,no,yes,no,yes,0,no,unfurnished
+2380000,2787,4,2,2,yes,yes,yes,yes,no,0,yes,unfurnished
+2380000,3264,2,1,1,yes,no,yes,no,yes,0,no,unfurnished
+2345000,3640,2,1,1,yes,yes,yes,yes,yes,0,no,unfurnished
+2310000,3180,2,1,1,yes,yes,no,yes,yes,0,yes,furnished
+2275000,1836,2,1,1,no,no,no,yes,yes,0,no,unfurnished
+2275000,3970,1,1,1,no,no,yes,no,no,0,no,semi-furnished
+2275000,3970,3,1,2,yes,no,yes,yes,no,0,no,semi-furnished
+2240000,1950,3,1,1,no,yes,yes,yes,yes,0,no,furnished
+2233000,5300,3,1,1,no,no,no,no,no,0,yes,semi-furnished
+2135000,3000,2,1,1,no,yes,yes,no,yes,0,no,semi-furnished
+2100000,2400,3,1,2,yes,yes,no,no,no,0,no,unfurnished
+2100000,3000,4,1,2,yes,yes,yes,no,no,0,no,semi-furnished
+2100000,3360,2,1,1,yes,no,yes,yes,yes,1,yes,furnished
+1960000,3420,5,1,2,no,no,yes,yes,no,0,no,unfurnished
+1890000,1700,3,1,2,yes,yes,yes,yes,yes,0,yes,unfurnished
+1890000,3649,2,1,1,yes,yes,no,yes,no,0,yes,furnished
+1855000,2990,2,1,1,no,no,no,no,yes,1,no,unfurnished
+1820000,3000,2,1,1,yes,yes,yes,yes,yes,2,no,unfurnished
+1767150,2400,3,1,1,no,no,yes,yes,no,0,yes,furnished
+1750000,3620,2,1,1,yes,yes,no,no,no,0,no,unfurnished
+1750000,2910,3,1,1,no,no,no,yes,yes,0,yes,furnished
+1750000,3850,3,1,2,yes,yes,no,yes,no,0,yes,furnished

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: (python scripts/init_production_db.py \|\| echo "DB init failed") && (python scripts/populate_agent_templates.py \|\| echo "Template init failed") && uvicorn app:app --host 0.0.0.0 --port $PORT

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Auto Analyst Backend
+emoji: 🦀
+colorFrom: green
+colorTo: indigo
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agents_config.json ADDED Viewed

	@@ -0,0 +1,149 @@

+{
+  "templates": [
+    {
+      "template_name": "preprocessing_agent",
+      "display_name": "Data Preprocessing Agent",
+      "description": "Cleans and prepares a DataFrame using Pandas and NumPy—handles missing values, detects column types, and converts date strings to datetime",
+      "icon_url": "/icons/templates/preprocessing_agent.svg",
+      "category": "Data Manipulation",
+      "is_premium_only": false,
+      "variant_type": "individual",
+      "base_agent": "preprocessing_agent",
+      "is_active": true,
+      "prompt_template": "You are a AI data-preprocessing agent. Generate clean and efficient Python code using NumPy and Pandas to perform introductory data preprocessing on a pre-loaded DataFrame df, based on the user's analysis goals.\nPreprocessing Requirements:\n1. Identify Column Types\n- Separate columns into numeric and categorical using:\n    categorical_columns = df.select_dtypes(include=[object, 'category']).columns.tolist()\n    numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()\n2. Handle Missing Values\n- Numeric columns: Impute missing values using the mean of each column\n- Categorical columns: Impute missing values using the mode of each column\n3. Convert Date Strings to Datetime\n- For any column suspected to represent dates (in string format), convert it to datetime using:\n    def safe_to_datetime(date):\n        try:\n            return pd.to_datetime(date, errors='coerce', cache=False)\n        except (ValueError, TypeError):\n            return pd.NaT\n    df['datetime_column'] = df['datetime_column'].apply(safe_to_datetime)\n- Replace 'datetime_column' with the actual column names containing date-like strings\nImportant Notes:\n- Do NOT create a correlation matrix — correlation analysis is outside the scope of preprocessing\n- Do NOT generate any plots or visualizations\nOutput Instructions:\n1. Include the full preprocessing Python code\n2. Provide a brief bullet-point summary of the steps performed. Example:\n• Identified 5 numeric and 4 categorical columns\n• Filled missing numeric values with column means\n• Filled missing categorical values with column modes\n• Converted 1 date column to datetime format\n Respond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "planner_preprocessing_agent",
+      "display_name": "Data Preprocessing Agent",
+      "description": "Multi-agent planner variant: Cleans and prepares a DataFrame using Pandas and NumPy—handles missing values, detects column types, and converts date strings to datetime",
+      "icon_url": "/icons/templates/preprocessing_agent.svg",
+      "category": "Data Manipulation",
+      "is_premium_only": false,
+      "variant_type": "planner",
+      "base_agent": "preprocessing_agent",
+      "is_active": true,
+      "prompt_template": "You are a data preprocessing agent optimized for multi-agent data analytics pipelines.\n\nYou are given:\n* A raw dataset (often just uploaded or loaded).\n* A user-defined goal (e.g., clean data for analysis, prepare for modeling).\n***plan_instructions** containing:\n  ***'create'**: Variables you must create (e.g., ['df_cleaned', 'preprocessing_summary', 'column_types'])\n  ***'use'**: Variables you must use (e.g., ['df', 'raw_data'])\n  * **'instruction'**: Specific preprocessing instructions\n\n### Your Planner-Optimized Responsibilities:\n* **ALWAYS follow plan_instructions** - essential for pipeline data flow\n* Create ONLY the variables specified in plan_instructions['create']\n* Use ONLY the variables specified in plan_instructions['use']\n* Apply preprocessing as per plan_instructions['instruction']\n* Ensure cleaned data integrates seamlessly with downstream agents\n\n### Core Preprocessing Techniques:\n* Identify and categorize column types (numeric, categorical, datetime)\n* Handle missing values appropriately:\n  - Numeric: impute with mean, median, or specified strategy\n  - Categorical: impute with mode or specified strategy\n* Convert date strings to datetime format with proper error handling\n* Remove duplicates and handle data quality issues\n* Apply data type optimizations for memory efficiency\n* Create preprocessing summaries for pipeline transparency\n\n### Multi-Agent Best Practices:\n* Use exact variable names from plan_instructions['create']\n* Ensure data format compatibility for downstream agents\n* Maintain data integrity and schema consistency\n* Document preprocessing steps for pipeline reproducibility\n\n### Output:\n* Python code implementing preprocessing per plan_instructions\n* Summary of data cleaning and transformation operations\n* Focus on seamless integration with analysis and modeling agents\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "statistical_analytics_agent",
+      "display_name": "Statistical Analytics Agent",
+      "description": "Performs statistical analysis (e.g., regression, seasonal decomposition) using statsmodels, with proper handling of categorical data and missing values",
+      "icon_url": "/icons/templates/statsmodel.svg",
+      "category": "Data Modelling",
+      "is_premium_only": false,
+      "variant_type": "individual",
+      "base_agent": "statistical_analytics_agent",
+      "is_active": true,
+      "prompt_template": "You are a statistical analytics agent. Your task is to take a dataset and a user-defined goal and output Python code that performs the appropriate statistical analysis to achieve that goal. Follow these guidelines:\nIMPORTANT: You may be provided with previous interaction history. The section marked \"### Current Query:\" contains the user's current request. Any text in \"### Previous Interaction History:\" is for context only and is NOT part of the current request.\nData Handling:\nAlways handle strings as categorical variables in a regression using statsmodels C(string_column).\nDo not change the index of the DataFrame.\nConvert X and y into float when fitting a model.\nError Handling:\nAlways check for missing values and handle them appropriately.\nEnsure that categorical variables are correctly processed.\nProvide clear error messages if the model fitting fails.\nRegression:\nFor regression, use statsmodels and ensure that a constant term is added to the predictor using sm.add_constant(X).\nHandle categorical variables using C(column_name) in the model formula.\nFit the model with model = sm.OLS(y.astype(float), X.astype(float)).fit().\nSeasonal Decomposition:\nEnsure the period is set correctly when performing seasonal decomposition.\nVerify the number of observations works for the decomposition.\nOutput:\nEnsure the code is executable and as intended.\nAlso choose the correct type of model for the problem\nAvoid adding data visualization code.\nProvide a concise bullet-point summary of the statistical analysis performed.\n\nExample Summary:\n• Applied linear regression with OLS to predict house prices based on 5 features\n• Model achieved R-squared of 0.78\n• Significant predictors include square footage (p<0.001) and number of bathrooms (p<0.01)\n• Detected strong seasonal pattern with 12-month periodicity\n• Forecast shows 15% growth trend over next quarter\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "planner_statistical_analytics_agent",
+      "display_name": "Statistical Analytics Agent",
+      "description": "Multi-agent planner variant: Performs statistical analysis (e.g., regression, seasonal decomposition) using statsmodels, with proper handling of categorical data and missing values",
+      "icon_url": "/icons/templates/statsmodel.svg",
+      "category": "Data Modelling",
+      "is_premium_only": false,
+      "variant_type": "planner",
+      "base_agent": "statistical_analytics_agent",
+      "is_active": true,
+      "prompt_template": "You are a statistical analytics agent optimized for multi-agent data analytics pipelines.\n\nYou are given:\n* A dataset (often preprocessed and cleaned).\n* A user-defined goal (e.g., regression analysis, time series analysis, hypothesis testing).\n* **plan_instructions** containing:\n  * **'create'**: Variables you must create (e.g., ['regression_model', 'statistical_results', 'model_summary'])\n  * **'use'**: Variables you must use (e.g., ['df_cleaned', 'target_variable', 'predictor_variables'])\n  * **'instruction'**: Specific statistical analysis instructions\n\n### Your Planner-Optimized Responsibilities:\n* **ALWAYS follow plan_instructions** - essential for pipeline analytical workflow\n* Create ONLY the variables specified in plan_instructions['create']\n* Use ONLY the variables specified in plan_instructions['use']\n* Apply statistical analysis as per plan_instructions['instruction']\n* Ensure statistical outputs integrate seamlessly with downstream agents\n\n### Statistical Analysis Techniques:\n* Use statsmodels for regression analysis with proper categorical handling\n* Apply time series analysis including seasonal decomposition\n* Implement hypothesis testing and statistical significance testing\n* Handle missing values and data quality issues appropriately\n* Use proper model specification with categorical variables: C(column_name)\n* Add constant terms for regression: sm.add_constant(X)\n* Ensure data types are appropriate: convert to float for modeling\n\n### Multi-Agent Best Practices:\n* Use exact variable names from plan_instructions['create']\n* Ensure statistical model objects are accessible to downstream agents\n* Maintain statistical rigor and proper model diagnostics\n* Focus on interpretable results for decision-making agents\n\n### Output:\n* Python code implementing statistical analysis per plan_instructions\n* Summary of statistical findings and model performance\n* Focus on robust statistical inference for pipeline decision-making\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "data_viz_agent",
+      "display_name": "Data Visualization Agent",
+      "description": "Creates interactive data visualizations using Plotly with advanced styling and formatting options",
+      "icon_url": "/icons/templates/plotly.svg",
+      "category": "Data Visualization",
+      "is_premium_only": false,
+      "variant_type": "individual",
+      "base_agent": "data_viz_agent",
+      "is_active": true,
+      "prompt_template": "You are an AI agent responsible for generating interactive data visualizations using Plotly.\nIMPORTANT Instructions:\n- The section marked \"### Current Query:\" contains the user's request. Any text in \"### Previous Interaction History:\" is for context only and should NOT be treated as part of the current request.\n- You must only use the tools provided to you. This agent handles visualization only.\n- If len(df) > 50000, always sample the dataset before visualization using:  \nif len(df) > 50000:  \n    df = df.sample(50000, random_state=1)\n- Each visualization must be generated as a **separate figure** using go.Figure().  \nDo NOT use subplots under any circumstances.\n- Each figure must be returned individually using:  \nfig.to_html(full_html=False)\n- Use update_layout with xaxis and yaxis **only once per figure**.\n- Enhance readability and clarity by:  \n• Using low opacity (0.4-0.7) where appropriate  \n• Applying visually distinct colors for different elements or categories  \n- Make sure the visual **answers the user's specific goal**:  \n• Identify what insight or comparison the user is trying to achieve  \n• Choose the visualization type and features (e.g., color, size, grouping) to emphasize that goal  \n• For example, if the user asks for \"trends in revenue,\" use a time series line chart; if they ask for \"top-performing categories,\" use a bar chart sorted by value  \n• Prioritize highlighting patterns, outliers, or comparisons relevant to the question\n- Never include the dataset or styling index in the output.\n- If there are no relevant columns for the requested visualization, respond with:  \n\"No relevant columns found to generate this visualization.\"\n- Use only one number format consistently: either 'K', 'M', or comma-separated values like 1,000/1,000,000. Do not mix formats.\n- Only include trendlines in scatter plots if the user explicitly asks for them.\n- Output only the code and a concise bullet-point summary of what the visualization reveals.\n- Always end each visualization with:  \nfig.to_html(full_html=False)\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "sk_learn_agent",
+      "display_name": "Machine Learning Agent",
+      "description": "Trains and evaluates machine learning models using scikit-learn, including classification, regression, and clustering with feature importance insights",
+      "icon_url": "/icons/templates/sk_learn_agent.svg",
+      "category": "Data Modelling",
+      "is_premium_only": false,
+      "variant_type": "individual",
+      "base_agent": "sk_learn_agent",
+      "is_active": true,
+      "prompt_template": "You are a machine learning agent. \nYour task is to take a dataset and a user-defined goal, and output Python code that performs the appropriate machine learning analysis to achieve that goal. \nYou should use the scikit-learn library.\nIMPORTANT: You may be provided with previous interaction history. The section marked \"### Current Query:\" contains the user's current request. Any text in \"### Previous Interaction History:\" is for context only and is NOT part of the current request.\nMake sure your output is as intended!\nProvide a concise bullet-point summary of the machine learning operations performed.\n\nExample Summary:\n• Trained a Random Forest classifier on customer churn data with 80/20 train-test split\n• Model achieved 92% accuracy and 88% F1-score\n• Feature importance analysis revealed that contract length and monthly charges are the strongest predictors of churn\n• Implemented K-means clustering (k=4) on customer shopping behaviors\n• Identified distinct segments: high-value frequent shoppers (22%), occasional big spenders (35%), budget-conscious regulars (28%), and rare visitors (15%)\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "planner_data_viz_agent",
+      "display_name": "Data Visualization Agent",
+      "description": "Multi-agent planner variant: Creates interactive data visualizations using Plotly with advanced styling and formatting options",
+      "icon_url": "/icons/templates/plotly.svg",
+      "category": "Data Visualization",
+      "is_premium_only": false,
+      "variant_type": "planner",
+      "base_agent": "data_viz_agent",
+      "is_active": true,
+      "prompt_template": "### **Data Visualization Agent Definition**\nYou are the **data visualization agent** in a multi-agent analytics pipeline. Your primary responsibility is to **generate visualizations** based on the **user-defined goal** and the **plan instructions**.\nYou are provided with:\n* **goal**: A user-defined goal outlining the type of visualization the user wants (e.g., \"plot sales over time with trendline\").\n* **dataset**: The dataset (e.g., `df_cleaned`) which will be passed to you by other agents in the pipeline. **Do not assume or create any variables** — **the data is already present and valid** when you receive it.\n* **styling_index**: Specific styling instructions (e.g., axis formatting, color schemes) for the visualization.\n* **plan_instructions**: A dictionary containing:\n* **'create'**: List of **visualization components** you must generate (e.g., 'scatter_plot', 'bar_chart').\n* **'use'**: List of **variables you must use** to generate the visualizations. This includes datasets and any other variables provided by the other agents.\n* **'instructions'**: A list of additional instructions related to the creation of the visualizations, such as requests for trendlines or axis formats.\n---\n### **Responsibilities**:\n1. **Strict Use of Provided Variables**:\n* You must **never create fake data**. Only use the variables and datasets that are explicitly **provided** to you in the `plan_instructions['use']` section. All the required data **must already be available**.\n* If any variable listed in `plan_instructions['use']` is missing or invalid, **you must return an error** and not proceed with any visualization.\n2. **Visualization Creation**:\n* Based on the **'create'** section of the `plan_instructions`, generate the **required visualization** using **Plotly**. For example, if the goal is to plot a time series, you might generate a line chart.\n* Respect the **user-defined goal** in determining which type of visualization to create.\n3. **Performance Optimization**:\n* If the dataset contains **more than 50,000 rows**, you **must sample** the data to **5,000 rows** to improve performance.\n4. **Layout and Styling**:\n* Apply formatting and layout adjustments as defined by the **styling_index**.\n* You must ensure that all axes (x and y) have **consistent formats** (e.g., using `K`, `M`, or 1,000 format, but not mixing formats).\n5. **Trendlines**:\n* Trendlines should **only be included** if explicitly requested in the **'instructions'** section of `plan_instructions`.\n6. **Displaying the Visualization**:\n* Use Plotly's `fig.show()` method to display the created chart.\n* **Never** output raw datasets or the **goal** itself. Only the visualization code and the chart should be returned.\n7. **Error Handling**:\n* If the required dataset or variables are missing or invalid (i.e., not included in `plan_instructions['use']`), return an error message indicating which specific variable is missing or invalid.\n8. **No Data Modification**:\n* **Never** modify the provided dataset or generate new data. If the data needs preprocessing or cleaning, assume it's already been done by other agents.\n---\n### **Strict Conditions**:\n* You **never** create any data.\n* You **only** use the data and variables passed to you.\n* If any required data or variable is missing or invalid, **you must stop** and return a clear error message.\n* Respond in the user's language for all summary and reasoning but keep the code in english\n* it should be update_yaxes, update_xaxes, not axis\nBy following these conditions and responsibilities, your role is to ensure that the **visualizations** are generated as per the user goal, using the valid data and instructions given to you."
+    },
+    {
+      "template_name": "planner_sk_learn_agent",
+      "display_name": "Machine Learning Agent",
+      "description": "Multi-agent planner variant: Trains and evaluates machine learning models using scikit-learn, including classification, regression, and clustering with feature importance insights",
+      "icon_url": "/icons/templates/sk_learn_agent.svg",
+      "category": "Data Modelling",
+      "is_premium_only": false,
+      "variant_type": "planner",
+      "base_agent": "sk_learn_agent",
+      "is_active": true,
+      "prompt_template": "**Agent Definition:**\nYou are a machine learning agent in a multi-agent data analytics pipeline.\nYou are given:\n* A dataset (often cleaned and feature-engineered).\n* A user-defined goal (e.g., classification, regression, clustering).\n* Agent-specific **plan instructions** specifying:\n* Which **variables** you are expected to **CREATE** (e.g., `trained_model`, `predictions`).\n* Which **variables** you will **USE** (e.g., `df_cleaned`, `target_variable`, `feature_columns`).\n* A set of **instructions** outlining additional processing or handling for these variables (e.g., handling missing values, applying transformations, or other task-specific guidelines).\n**Your Responsibilities:**\n* Use the scikit-learn library to implement the appropriate ML pipeline.\n* Always split data into training and testing sets where applicable.\n* Use `print()` for all outputs.\n* Ensure your code is:\n* **Reproducible**: Set `random_state=42` wherever applicable.\n* **Modular**: Avoid deeply nested code.\n* **Focused on model building**, not visualization (leave plotting to the `data_viz_agent`).\n**You must not:**\n* Visualize anything (that's another agent's job).\n* Rely on hardcoded column names — use those passed via `plan_instructions`.\n* **Never create or modify any variables not explicitly mentioned in `plan_instructions['CREATE']`.**\n* **Never create the `df` variable**. You will **only** work with the variables passed via the `plan_instructions`.\n* Do not introduce intermediate variables unless they are listed in `plan_instructions['CREATE']`.\n**Instructions to Follow:**\n1. **CREATE** only the variables specified in the `plan_instructions['CREATE']` list. Do not create any intermediate or new variables.\n2. **USE** only the variables specified in the `plan_instructions['USE']` list. You are **not allowed** to create or modify any variables not listed in the plan instructions.\n3. Follow any **processing instructions** in the `plan_instructions['INSTRUCTIONS']` list. This might include tasks like handling missing values, scaling features, or encoding categorical variables. Always perform these steps on the variables specified in the `plan_instructions`.\n4. Do **not reassign or modify** any variables passed via `plan_instructions`. These should be used as-is.\n**Output:**\n* The **code** implementing the ML task, including all required steps.\n* A **summary** of what the model does, how it is evaluated, and why it fits the goal.\n* Respond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "feature_engineering_agent",
+      "display_name": "Feature Engineering Agent",
+      "description": "Advanced feature creation and selection for machine learning pipelines using various encoding and transformation techniques",
+      "icon_url": "/icons/templates/feature-engineering.png",
+      "category": "Data Modelling",
+      "is_premium_only": true,
+      "variant_type": "individual",
+      "base_agent": "feature_engineering_agent",
+      "is_active": true,
+      "prompt_template": "You are a feature engineering expert for machine learning pipelines. Your task is to take a dataset and a user-defined goal and create meaningful features that improve model performance.\n\nIMPORTANT Instructions:\n- Create meaningful features from raw data based on the user's goal\n- Apply feature scaling, encoding, and transformation techniques\n- Handle categorical variables with appropriate encoding methods (one-hot, label, target encoding)\n- Create polynomial features, interactions, and domain-specific features when beneficial\n- Perform feature selection using statistical and ML methods\n- Handle time-series feature engineering when applicable (lag features, rolling statistics)\n- Ensure features are robust and avoid data leakage\n- Use libraries like pandas, numpy, scikit-learn for feature engineering\n- Document feature engineering decisions and rationale\n\nProvide a concise bullet-point summary of the feature engineering operations performed.\n\nExample Summary:\n• Created 15 new features including polynomial interactions between price and quantity\n• Applied target encoding to categorical variables with high cardinality\n• Generated time-based features: day of week, month, rolling 7-day averages\n• Removed 8 highly correlated features (correlation > 0.95)\n• Applied StandardScaler to numerical features for model compatibility\n• Final feature set: 23 features with improved signal-to-noise ratio\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "planner_feature_engineering_agent",
+      "display_name": "Feature Engineering Agent",
+      "description": "Multi-agent planner variant: Advanced feature creation and selection for machine learning pipelines using various encoding and transformation techniques",
+      "icon_url": "/icons/templates/feature-engineering.png",
+      "category": "Data Modelling",
+      "is_premium_only": true,
+      "variant_type": "planner",
+      "base_agent": "feature_engineering_agent",
+      "is_active": true,
+      "prompt_template": "You are a feature engineering expert optimized for multi-agent data analytics pipelines.\n\nYou are given:\n* A dataset (often raw or lightly processed).\n* A user-defined goal (e.g., improve model performance, create specific feature types).\n* **plan_instructions** containing:\n  * **'create'**: Variables you must create (e.g., ['engineered_features', 'feature_names', 'scaler_object'])\n  * **'use'**: Variables you must use (e.g., ['raw_data', 'target_column'])\n  * **'instruction'**: Specific feature engineering instructions\n\n### Your Planner-Optimized Responsibilities:\n* **ALWAYS follow plan_instructions** - essential for pipeline coordination\n* Create ONLY the variables specified in plan_instructions['create']\n* Use ONLY the variables specified in plan_instructions['use']\n* Apply feature engineering techniques as per plan_instructions['instruction']\n* Ensure engineered features integrate seamlessly with downstream ML agents\n\n### Feature Engineering Techniques:\n* Categorical encoding (one-hot, label, target encoding)\n* Numerical transformations (scaling, normalization, polynomial features)\n* Time-series features (lag features, rolling statistics, temporal patterns)\n* Feature selection and dimensionality reduction\n* Interaction features and domain-specific feature creation\n* Handle missing values and outliers appropriately\n\n### Multi-Agent Best Practices:\n* Use exact variable names from plan_instructions['create']\n* Ensure feature compatibility for downstream agents\n* Maintain data integrity and prevent leakage\n* Document feature engineering decisions for pipeline transparency\n\n### Output:\n* Python code implementing feature engineering per plan_instructions\n* Summary of features created and transformations applied\n* Focus on seamless integration with ML modeling agents\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "polars_agent",
+      "display_name": "Polars Agent",
+      "description": "High-performance data processing using Polars for large datasets with lazy evaluation and efficient memory usage",
+      "icon_url": "/icons/templates/polars_github_logo_rect_dark_name.svg",
+      "category": "Data Manipulation",
+      "is_premium_only": true,
+      "variant_type": "individual",
+      "base_agent": "polars_agent",
+      "is_active": true,
+      "prompt_template": "You are a Polars expert for high-performance data processing. Your task is to take a dataset and a user-defined goal and use Polars library for efficient data manipulation based on the user's goal.\n\nIMPORTANT Instructions:\n- Use Polars for efficient data manipulation and analysis\n- Leverage lazy evaluation for optimal performance with .lazy() and .collect()\n- Handle large datasets that don't fit in memory using streaming\n- Use Polars expressions (pl.col, pl.when, etc.) for complex transformations\n- Optimize query plans for speed and memory efficiency\n- Convert to/from pandas when needed for compatibility with other tools\n- Use appropriate data types to minimize memory usage\n- Apply Polars-specific optimizations like predicate pushdown\n- Focus on performance and memory efficiency over simplicity\n\nProvide a concise bullet-point summary of the Polars operations performed.\n\nExample Summary:\n• Processed 10M row dataset using lazy evaluation for memory efficiency\n• Applied complex filtering and aggregations with 5x speedup vs pandas\n• Used Polars expressions for vectorized string operations\n• Implemented window functions for time-series calculations\n• Optimized memory usage by selecting appropriate dtypes (reduced from 2GB to 500MB)\n• Final output: clean, aggregated dataset ready for analysis\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    },
+    {
+      "template_name": "planner_polars_agent",
+      "display_name": "Polars Agent",
+      "description": "Multi-agent planner variant: High-performance data processing using Polars for large datasets with lazy evaluation and efficient memory usage",
+      "icon_url": "https://raw.githubusercontent.com/pola-rs/polars-static/master/logos/polars_github_logo_rect_dark_name.svg",
+      "category": "Data Manipulation",
+      "is_premium_only": true,
+      "variant_type": "planner",
+      "base_agent": "polars_agent",
+      "is_active": true,
+      "prompt_template": "You are a Polars expert optimized for multi-agent data processing pipelines.\n\nYou are given:\n* A dataset (often large or complex).\n* A user-defined goal (e.g., data transformation, aggregation, filtering).\n* **plan_instructions** containing:\n  * **'create'**: Variables you must create (e.g., ['processed_data', 'summary_stats'])\n  * **'use'**: Variables you must use (e.g., ['raw_data', 'filter_conditions'])\n  * **'instruction'**: Specific data processing instructions\n\n### Your Planner-Optimized Responsibilities:\n* **ALWAYS follow plan_instructions** - critical for pipeline data flow\n* Create ONLY the variables specified in plan_instructions['create']\n* Use ONLY the variables specified in plan_instructions['use']\n* Apply Polars operations as per plan_instructions['instruction']\n* Ensure processed data integrates seamlessly with downstream agents\n\n### Polars Optimization Techniques:\n* Use lazy evaluation (.lazy().collect()) for memory efficiency\n* Apply predicate pushdown and projection pushdown optimizations\n* Leverage Polars expressions for vectorized operations\n* Use appropriate data types to minimize memory footprint\n* Implement streaming for datasets larger than memory\n* Convert to pandas DataFrame only when required by downstream agents\n\n### Multi-Agent Best Practices:\n* Use exact variable names from plan_instructions['create']\n* Ensure data format compatibility for subsequent agents\n* Maintain data integrity and schema consistency\n* Optimize for both speed and memory usage in pipeline context\n\n### Output:\n* Python code implementing Polars operations per plan_instructions\n* Summary of data processing and optimizations applied\n* Focus on high-performance data flow in multi-agent pipeline\n\nRespond in the user's language for all summary and reasoning but keep the code in english"
+    }
+  ],
+  "remove": []
+}

app.py ADDED Viewed

	@@ -0,0 +1,1589 @@

+# Standard library imports
+import asyncio
+import json
+import logging
+import os
+import time
+import uuid
+from io import StringIO
+from typing import List, Optional
+import ast
+import markdown
+from bs4 import BeautifulSoup
+import pandas as pd
+from datetime import datetime, UTC
+# Third-party imports
+import uvicorn
+from dotenv import load_dotenv
+from fastapi import (
+    Depends,
+    FastAPI,
+    File,
+    Form,
+    HTTPException,
+    Request,
+    UploadFile
+)
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.security import APIKeyHeader
+from llama_index.core import Document, VectorStoreIndex
+from pydantic import BaseModel
+# Local application imports
+from scripts.format_response import format_response_to_markdown
+from src.agents.agents import *
+from src.agents.retrievers.retrievers import *
+from src.managers.ai_manager import AI_Manager
+from src.managers.session_manager import SessionManager
+from src.routes.analytics_routes import router as analytics_router
+from src.routes.chat_routes import router as chat_router
+from src.routes.code_routes import router as code_router
+from src.routes.feedback_routes import router as feedback_router
+from src.routes.session_routes import router as session_router, get_session_id_dependency
+from src.routes.deep_analysis_routes import router as deep_analysis_router
+from src.routes.templates_routes import router as templates_router
+from src.schemas.query_schema import QueryRequest
+from src.utils.logger import Logger
+# Import deep analysis components directly
+# from src.agents.try_deep_agents import deep_analysis_module
+from src.agents.deep_agents import deep_analysis_module
+from src.utils.generate_report import generate_html_report
+from src.utils.model_registry import MODEL_OBJECTS
+logger = Logger("app", see_time=True, console_log=True)
+load_dotenv()
+# Request models
+class DeepAnalysisRequest(BaseModel):
+    goal: str
+class DeepAnalysisResponse(BaseModel):
+    goal: str
+    deep_questions: str
+    deep_plan: str
+    summaries: List[str]
+    code: str
+    plotly_figs: List
+    synthesis: List[str]
+    final_conclusion: str
+    html_report: Optional[str] = None
+styling_instructions = [
+    """
+        Dont ignore any of these instructions.
+        For a line chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label and try to use multiple colors if more than one line
+        Annotate the min and max of the line
+        Display numbers in thousand(K) or Million(M) if larger than 1000/100000
+        Show percentages in 2 decimal points with '%' sign
+        Default size of chart should be height =1200 and width =1000
+        """
+   , """
+        Dont ignore any of these instructions.
+        For a bar chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000.
+        Annotate the values of the bar chart
+        If variable is a percentage show in 2 decimal points with '%' sign.
+        Default size of chart should be height =1200 and width =1000
+        """
+        ,
+          """
+        For a histogram chart choose a bin_size of 50
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+          """
+        For a pie chart only show top 10 categories, bundle rest as others
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+          """
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        Don't add K/M if number already in , or value is not a number
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+"""
+    For a heat map
+    Use the 'plotly_white' template for a clean, white background.
+    Set a chart title
+    Style the X-axis with a black line color, 0.2 line width, 1 grid width, format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    .style the Y-axis with a black line color, 0.2 line width, 1 grid width format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    . Set the figure dimensions to a height of 1200 pixels and a width of 1000 pixels.
+""",
+"""
+    For a Histogram, used for returns/distribution plotting
+    Use the 'plotly_white' template for a clean, white background.
+    Set a chart title
+    Style the X-axis  1 grid width, format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    .style the Y-axis, 1 grid width format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    Use an opacity of 0.75
+     Set the figure dimensions to a height of 1200 pixels and a width of 1000 pixels.
+"""
+]
+# Add near the top of the file, after imports
+DEFAULT_MODEL_CONFIG = {
+    "provider": os.getenv("MODEL_PROVIDER", "openai"),
+    "model": os.getenv("MODEL_NAME", "gpt-5-mini"),
+    "api_key": os.getenv("OPENAI_API_KEY"),
+    "temperature": float(os.getenv("TEMPERATURE", 1.0)),
+    "max_tokens": int(os.getenv("MAX_TOKENS", 6000)), "cache": False
+}
+# Create default LM config but don't set it globally
+default_lm = MODEL_OBJECTS[DEFAULT_MODEL_CONFIG['model']]
+# lm = dspy.LM('openai/gpt-4o-mini', api_key=os.getenv("OPENAI_API_KEY"))
+dspy.configure(lm=default_lm, async_max_workers=100)
+# Function to get model config from session or use default
+def get_session_lm(session_state):
+    """Get the appropriate LM instance for a session, or default if not configured"""
+    # First check if we have a valid session-specific model config
+    if session_state and isinstance(session_state, dict) and "model_config" in session_state:
+        model_config = session_state["model_config"]
+        if model_config and isinstance(model_config, dict) and "model" in model_config:
+            # Found valid session-specific model config, use it
+            provider = model_config.get("provider", "openai").lower()
+            model_name = model_config.get("model", DEFAULT_MODEL_CONFIG["model"])
+            if 'gpt-5' or 'o1' not in model_name:
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['max_tokens'] = model_config.get("max_tokens", DEFAULT_MODEL_CONFIG["max_tokens"])
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['temperature'] = model_config.get("temperature", DEFAULT_MODEL_CONFIG["temperature"])
+            elif 'gpt-5' or 'o1' in model_name and provider =='openai':
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['max_completion_tokens'] = model_config.get("max_tokens", DEFAULT_MODEL_CONFIG["max_tokens"])
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['temperature'] = 1.0
+            else:
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['max_tokens'] = model_config.get("max_tokens", DEFAULT_MODEL_CONFIG["max_tokens"])
+                MODEL_OBJECTS[model_name].__dict__['kwargs']['temperature'] = model_config.get("temperature", DEFAULT_MODEL_CONFIG["temperature"])
+    # If no valid session config, use default
+    return MODEL_OBJECTS[model_name]
+# Initialize retrievers with empty data first
+def initialize_retrievers(styling_instructions: List[str], doc: List[str]):
+    try:
+        style_index = VectorStoreIndex.from_documents([Document(text=x) for x in styling_instructions])
+        data_index = VectorStoreIndex.from_documents([Document(text=x) for x in doc])
+        return {"style_index": style_index, "dataframe_index": data_index}
+    except Exception as e:
+        logger.log_message(f"Error initializing retrievers: {str(e)}", level=logging.ERROR)
+        raise e
+# clear console
+def clear_console():
+    os.system('cls' if os.name == 'nt' else 'clear')
+# Check for Housing.csv
+housing_csv_path = "Housing.csv"
+if not os.path.exists(housing_csv_path):
+    logger.log_message(f"Housing.csv not found at {os.path.abspath(housing_csv_path)}", level=logging.ERROR)
+    raise FileNotFoundError(f"Housing.csv not found at {os.path.abspath(housing_csv_path)}")
+# All agents are now loaded from database - no hardcoded dictionaries needed
+# Add session header
+X_SESSION_ID = APIKeyHeader(name="X-Session-ID", auto_error=False)
+# Update AppState class to use SessionManager
+class AppState:
+    def __init__(self):
+        self._session_manager = SessionManager(styling_instructions, {})  # Empty dict, agents loaded from DB
+        self.model_config = DEFAULT_MODEL_CONFIG.copy()
+        # Update the SessionManager with the current model_config
+        self._session_manager._app_model_config = self.model_config
+        self.ai_manager = AI_Manager()
+        self.chat_name_agent = chat_history_name_agent
+        # Initialize deep analysis module
+        self.deep_analyzer = None
+    def get_session_state(self, session_id: str):
+        """Get or create session-specific state using the SessionManager"""
+        return self._session_manager.get_session_state(session_id)
+    def clear_session_state(self, session_id: str):
+        """Clear session-specific state using the SessionManager"""
+        self._session_manager.clear_session_state(session_id)
+    def update_session_dataset(self, session_id: str, df, name, desc):
+        """Update dataset for a specific session using the SessionManager"""
+        self._session_manager.update_session_dataset(session_id, df, name, desc)
+    def reset_session_to_default(self, session_id: str):
+        """Reset a session to use the default dataset using the SessionManager"""
+        self._session_manager.reset_session_to_default(session_id)
+    def set_session_user(self, session_id: str, user_id: int, chat_id: int = None):
+        """Associate a user with a session using the SessionManager"""
+        return self._session_manager.set_session_user(session_id, user_id, chat_id)
+    def get_ai_manager(self):
+        """Get the AI Manager instance"""
+        return self.ai_manager
+    def get_provider_for_model(self, model_name):
+        return self.ai_manager.get_provider_for_model(model_name)
+    def calculate_cost(self, model_name, input_tokens, output_tokens):
+        return self.ai_manager.calculate_cost(model_name, input_tokens, output_tokens)
+    def save_usage_to_db(self, user_id, chat_id, model_name, provider, prompt_tokens, completion_tokens, total_tokens, query_size, response_size, cost, request_time_ms, is_streaming=False):
+        return self.ai_manager.save_usage_to_db(user_id, chat_id, model_name, provider, prompt_tokens, completion_tokens, total_tokens, query_size, response_size, round(cost, 7), request_time_ms, is_streaming)
+    def get_tokenizer(self):
+        return self.ai_manager.tokenizer
+    def get_chat_history_name_agent(self):
+        return dspy.Predict(self.chat_name_agent)
+    def get_deep_analyzer(self, session_id: str):
+        """Get or create deep analysis module for a session"""
+        session_state = self.get_session_state(session_id)
+        user_id = session_state.get("user_id")
+        # Check if we need to recreate the deep analyzer (user changed or doesn't exist)
+        current_analyzer = session_state.get('deep_analyzer')
+        analyzer_user_id = session_state.get('deep_analyzer_user_id')
+        logger.log_message(f"Deep analyzer check - session: {session_id}, current_user: {user_id}, analyzer_user: {analyzer_user_id}, has_analyzer: {current_analyzer is not None}", level=logging.INFO)
+        if (not current_analyzer or
+            analyzer_user_id != user_id or
+            not hasattr(session_state, 'deep_analyzer')):
+            logger.log_message(f"Creating/recreating deep analyzer for session {session_id}, user_id: {user_id} (reason: analyzer_exists={current_analyzer is not None}, user_match={analyzer_user_id == user_id})", level=logging.INFO)
+            # Load user-enabled agents from database using preference system
+            from src.db.init_db import session_factory
+            from src.agents.agents import load_user_enabled_templates_for_planner_from_db
+            db_session = session_factory()
+            try:
+                # Load user-enabled agents for planner (respects preferences)
+                if user_id:
+                    enabled_agents_dict = load_user_enabled_templates_for_planner_from_db(user_id, db_session)
+                    logger.log_message(f"Deep analyzer loaded {len(enabled_agents_dict)} enabled agents for user {user_id}: {list(enabled_agents_dict.keys())}", level=logging.INFO)
+                    if not enabled_agents_dict:
+                        logger.log_message(f"WARNING: No enabled agents found for user {user_id}, falling back to defaults", level=logging.WARNING)
+                        # Fallback to default agents if no enabled agents
+                        from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent
+                        enabled_agents_dict = {
+                            "preprocessing_agent": preprocessing_agent,
+                            "statistical_analytics_agent": statistical_analytics_agent,
+                            "sk_learn_agent": sk_learn_agent,
+                            "data_viz_agent": data_viz_agent
+                        }
+                else:
+                    # Fallback to default agents if no user_id
+                    logger.log_message("No user_id in session, loading default agents for deep analysis", level=logging.WARNING)
+                    from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent
+                    enabled_agents_dict = {
+                        "preprocessing_agent": preprocessing_agent,
+                        "statistical_analytics_agent": statistical_analytics_agent,
+                        "sk_learn_agent": sk_learn_agent,
+                        "data_viz_agent": data_viz_agent
+                    }
+                # Create agents dictionary for deep analysis using enabled agents
+                deep_agents = {}
+                deep_agents_desc = {}
+                for agent_name, signature in enabled_agents_dict.items():
+                    deep_agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(signature))
+                    # Get agent description from database
+                    deep_agents_desc[agent_name] = get_agent_description(agent_name)
+                logger.log_message(f"Deep analyzer initialized with {len(deep_agents)} agents: {list(deep_agents.keys())}", level=logging.INFO)
+            except Exception as e:
+                logger.log_message(f"Error loading agents for deep analysis: {str(e)}", level=logging.ERROR)
+                # Fallback to minimal set
+                from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent
+                deep_agents = {
+                    "preprocessing_agent": dspy.asyncify(dspy.Predict(preprocessing_agent)),
+                    "statistical_analytics_agent": dspy.asyncify(dspy.Predict(statistical_analytics_agent)),
+                    "sk_learn_agent": dspy.asyncify(dspy.Predict(sk_learn_agent)),
+                    "data_viz_agent": dspy.asyncify(dspy.Predict(data_viz_agent))
+                }
+                deep_agents_desc = {name: get_agent_description(name) for name in deep_agents.keys()}
+                logger.log_message(f"Using fallback agents: {list(deep_agents.keys())}", level=logging.WARNING)
+            finally:
+                db_session.close()
+            session_state['deep_analyzer'] = deep_analysis_module(agents=deep_agents, agents_desc=deep_agents_desc)
+            session_state['deep_analyzer_user_id'] = user_id  # Track which user this analyzer was created for
+        else:
+            logger.log_message(f"Using existing deep analyzer for session {session_id}, user_id: {user_id}", level=logging.INFO)
+        return session_state['deep_analyzer']
+# Initialize FastAPI app with state
+app = FastAPI(title="AI Analytics API", version="1.0")
+app.state = AppState()
+# Configure middleware
+# Use a wildcard for local development or read from environment
+is_development = os.getenv("ENVIRONMENT", "development").lower() == "development"
+allowed_origins = []
+frontend_url = os.getenv("FRONTEND_URL", "").strip()
+print(f"FRONTEND_URL: {frontend_url}")
+if is_development:
+    allowed_origins = ["*"]
+elif frontend_url:
+    allowed_origins = [frontend_url]
+else:
+    logger.log_message("CORS misconfigured: FRONTEND_URL not set", level=logging.ERROR)
+    allowed_origins = []  # or set a default safe origin
+# Add a strict origin verification middleware
+@app.middleware("http")
+async def verify_origin_middleware(request: Request, call_next):
+    # Skip origin check in development mode
+    if is_development:
+        return await call_next(request)
+    # Get the origin from the request headers
+    origin = request.headers.get("origin")
+    # Log the origin for debugging
+    if origin:
+        print(f"Request from origin: {origin}")
+    # If no origin header or origin not in allowed list, reject the request
+    if origin and frontend_url and origin != frontend_url:
+        print(f"Blocked request from unauthorized origin: {origin}")
+        return JSONResponse(
+            status_code=403,
+            content={"detail": "Not authorized"}
+        )
+    # Continue processing the request if origin is allowed
+    return await call_next(request)
+# CORS middleware (still needed for browser preflight)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,
+    allow_origin_regex=None,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+    expose_headers=["*"],
+    max_age=600  # Cache preflight requests for 10 minutes (for performance)
+)
+# Add these constants at the top of the file with other imports/constants
+RESPONSE_ERROR_INVALID_QUERY = "Please provide a valid query..."
+RESPONSE_ERROR_NO_DATASET = "No dataset is currently loaded. Please link a dataset before proceeding with your analysis."
+DEFAULT_TOKEN_RATIO = 1.5
+REQUEST_TIMEOUT_SECONDS = 30  # Timeout for LLM requests
+MAX_RECENT_MESSAGES = 5
+DB_BATCH_SIZE = 10  # For future batch DB operations
+@app.post("/chat/{agent_name}", response_model=dict)
+async def chat_with_agent(
+    agent_name: str,
+    request: QueryRequest,
+    request_obj: Request,
+    session_id: str = Depends(get_session_id_dependency)
+):
+    session_state = app.state.get_session_state(session_id)
+    logger.log_message(f"[DEBUG] chat_with_agent called with agent: '{agent_name}', query: '{request.query[:100]}...'", level=logging.DEBUG)
+    try:
+        # Extract and validate query parameters
+        logger.log_message(f"[DEBUG] Updating session from query params", level=logging.DEBUG)
+        _update_session_from_query_params(request_obj, session_state)
+        logger.log_message(f"[DEBUG] Session state after query params: user_id={session_state.get('user_id')}, chat_id={session_state.get('chat_id')}", level=logging.DEBUG)
+        # Validate dataset and agent name
+        if session_state["current_df"] is None:
+            logger.log_message(f"[DEBUG] No dataset loaded", level=logging.DEBUG)
+            raise HTTPException(status_code=400, detail=RESPONSE_ERROR_NO_DATASET)
+        logger.log_message(f"[DEBUG] About to validate agent name: '{agent_name}'", level=logging.DEBUG)
+        _validate_agent_name(agent_name, session_state)
+        logger.log_message(f"[DEBUG] Agent validation completed successfully", level=logging.DEBUG)
+        # Record start time for timing
+        start_time = time.time()
+        # Get chat context and prepare query
+        logger.log_message(f"[DEBUG] Preparing query with context", level=logging.DEBUG)
+        enhanced_query = _prepare_query_with_context(request.query, session_state)
+        logger.log_message(f"[DEBUG] Enhanced query length: {len(enhanced_query)}", level=logging.DEBUG)
+        # Initialize agent - handle standard, template, and custom agents
+        if "," in agent_name:
+            logger.log_message(f"[DEBUG] Processing multiple agents: {agent_name}", level=logging.DEBUG)
+            # Multiple agents case
+            agent_list = [agent.strip() for agent in agent_name.split(",")]
+            # Categorize agents
+            standard_agents = [agent for agent in agent_list if _is_standard_agent(agent)]
+            template_agents = [agent for agent in agent_list if _is_template_agent(agent)]
+            custom_agents = [agent for agent in agent_list if not _is_standard_agent(agent) and not _is_template_agent(agent)]
+            logger.log_message(f"[DEBUG] Agent categorization - standard: {standard_agents}, template: {template_agents}, custom: {custom_agents}", level=logging.DEBUG)
+            if custom_agents:
+                # If any custom agents, use session AI system for all
+                ai_system = session_state["ai_system"]
+                session_lm = get_session_lm(session_state)
+                logger.log_message(f"[DEBUG] Using custom agent execution path", level=logging.DEBUG)
+                with dspy.context(lm=session_lm):
+                    response = await asyncio.wait_for(
+                        _execute_custom_agents(ai_system, agent_list, enhanced_query),
+                        timeout=REQUEST_TIMEOUT_SECONDS
+                    )
+                    logger.log_message(f"[DEBUG] Custom agents response type: {type(response)}, keys: {list(response.keys()) if isinstance(response, dict) else 'not a dict'}", level=logging.DEBUG)
+            else:
+                # All standard/template agents - use auto_analyst_ind which loads from DB
+                user_id = session_state.get("user_id")
+                logger.log_message(f"[DEBUG] Using auto_analyst_ind for multiple standard/template agents with user_id: {user_id}", level=logging.DEBUG)
+                # Create database session for agent loading
+                from src.db.init_db import session_factory
+                db_session = session_factory()
+                try:
+                    # auto_analyst_ind will load all agents from database
+                    logger.log_message(f"[DEBUG] Creating auto_analyst_ind instance", level=logging.DEBUG)
+                    agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session)
+                    session_lm = get_session_lm(session_state)
+                    logger.log_message(f"[DEBUG] About to call agent.forward with query and agent list", level=logging.DEBUG)
+                    with dspy.context(lm=session_lm):
+                        response = await asyncio.wait_for(
+                            agent.forward(enhanced_query, ",".join(agent_list)),
+                            timeout=REQUEST_TIMEOUT_SECONDS
+                        )
+                        logger.log_message(f"[DEBUG] auto_analyst_ind response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG)
+                finally:
+                    db_session.close()
+        else:
+            logger.log_message(f"[DEBUG] Processing single agent: {agent_name}", level=logging.DEBUG)
+            # Single agent case
+            if _is_standard_agent(agent_name) or _is_template_agent(agent_name):
+                # Standard or template agent - use auto_analyst_ind which loads from DB
+                user_id = session_state.get("user_id")
+                logger.log_message(f"[DEBUG] Using auto_analyst_ind for single standard/template agent '{agent_name}' with user_id: {user_id}", level=logging.DEBUG)
+                # Create database session for agent loading
+                from src.db.init_db import session_factory
+                db_session = session_factory()
+                try:
+                    # auto_analyst_ind will load all agents from database
+                    logger.log_message(f"[DEBUG] Creating auto_analyst_ind instance for single agent", level=logging.DEBUG)
+                    agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session)
+                    session_lm = get_session_lm(session_state)
+                    logger.log_message(f"[DEBUG] About to call agent.forward for single agent '{agent_name}'", level=logging.DEBUG)
+                    with dspy.context(lm=session_lm):
+                        response = await asyncio.wait_for(
+                            agent.forward(enhanced_query, agent_name),
+                            timeout=REQUEST_TIMEOUT_SECONDS
+                        )
+                        logger.log_message(f"[DEBUG] Single agent response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG)
+                finally:
+                    db_session.close()
+            else:
+                # Custom agent - use session AI system
+                ai_system = session_state["ai_system"]
+                session_lm = get_session_lm(session_state)
+                logger.log_message(f"[DEBUG] Using custom agent execution for '{agent_name}'", level=logging.DEBUG)
+                with dspy.context(lm=session_lm):
+                    response = await asyncio.wait_for(
+                        _execute_custom_agents(ai_system, [agent_name], enhanced_query),
+                        timeout=REQUEST_TIMEOUT_SECONDS
+                    )
+                    logger.log_message(f"[DEBUG] Custom single agent response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG)
+        logger.log_message(f"[DEBUG] About to format response to markdown. Response type: {type(response)}", level=logging.DEBUG)
+        formatted_response = format_response_to_markdown(response, agent_name, session_state["current_df"])
+        logger.log_message(f"[DEBUG] Formatted response type: {type(formatted_response)}, length: {len(str(formatted_response))}", level=logging.DEBUG)
+        if formatted_response == RESPONSE_ERROR_INVALID_QUERY:
+            logger.log_message(f"[DEBUG] Response was invalid query error", level=logging.DEBUG)
+            return {
+                "agent_name": agent_name,
+                "query": request.query,
+                "response": formatted_response,
+                "session_id": session_id
+            }
+        # Track usage statistics
+        if session_state.get("user_id"):
+            logger.log_message(f"[DEBUG] Tracking model usage", level=logging.DEBUG)
+            _track_model_usage(
+                session_state=session_state,
+                enhanced_query=enhanced_query,
+                response=response,
+                processing_time_ms=int((time.time() - start_time) * 1000)
+            )
+        logger.log_message(f"[DEBUG] chat_with_agent completed successfully", level=logging.DEBUG)
+        return {
+            "agent_name": agent_name,
+            "query": request.query,  # Return original query without context
+            "response": formatted_response,
+            "session_id": session_id
+        }
+    except HTTPException:
+        # Re-raise HTTP exceptions to preserve status codes
+        logger.log_message(f"[DEBUG] HTTPException caught and re-raised", level=logging.DEBUG)
+        raise
+    except asyncio.TimeoutError:
+        logger.log_message(f"[ERROR] Timeout error in chat_with_agent", level=logging.ERROR)
+        raise HTTPException(status_code=504, detail="Request timed out. Please try a simpler query.")
+    except Exception as e:
+        logger.log_message(f"[ERROR] Unexpected error in chat_with_agent: {str(e)}", level=logging.ERROR)
+        logger.log_message(f"[ERROR] Exception type: {type(e)}, traceback: {str(e)}", level=logging.ERROR)
+        import traceback
+        logger.log_message(f"[ERROR] Full traceback: {traceback.format_exc()}", level=logging.ERROR)
+        raise HTTPException(status_code=500, detail="An unexpected error occurred. Please try again later.")
+@app.post("/chat", response_model=dict)
+async def chat_with_all(
+    request: QueryRequest,
+    request_obj: Request,
+    session_id: str = Depends(get_session_id_dependency)
+):
+    session_state = app.state.get_session_state(session_id)
+    try:
+        # Extract and validate query parameters
+        _update_session_from_query_params(request_obj, session_state)
+        # Validate dataset
+        if session_state["current_df"] is None:
+            raise HTTPException(status_code=400, detail=RESPONSE_ERROR_NO_DATASET)
+        if session_state["ai_system"] is None:
+            raise HTTPException(status_code=500, detail="AI system not properly initialized.")
+        # Get session-specific model
+        session_lm = get_session_lm(session_state)
+        # Create streaming response
+        return StreamingResponse(
+            _generate_streaming_responses(session_state, request.query, session_lm),
+            media_type='text/event-stream',
+            headers={
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'Content-Type': 'text/event-stream',
+                'Access-Control-Allow-Origin': '*',
+                'X-Accel-Buffering': 'no'
+            }
+        )
+    except HTTPException:
+        # Re-raise HTTP exceptions to preserve status codes
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail="An unexpected error occurred. Please try again later.")
+# Helper functions to reduce duplication and improve modularity
+def _update_session_from_query_params(request_obj: Request, session_state: dict):
+    """Extract and validate chat_id and user_id from query parameters"""
+    # Check for chat_id in query parameters
+    if "chat_id" in request_obj.query_params:
+        try:
+            chat_id_param = int(request_obj.query_params.get("chat_id"))
+            # Update session state with this chat ID
+            session_state["chat_id"] = chat_id_param
+        except (ValueError, TypeError):
+            logger.log_message("Invalid chat_id parameter", level=logging.WARNING)
+            # Continue without updating chat_id
+    # Check for user_id in query parameters
+    if "user_id" in request_obj.query_params:
+        try:
+            user_id = int(request_obj.query_params["user_id"])
+            session_state["user_id"] = user_id
+        except (ValueError, TypeError):
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid user_id in query params. Please provide a valid integer."
+            )
+def _validate_agent_name(agent_name: str, session_state: dict = None):
+    """Validate that the agent name(s) are available"""
+    logger.log_message(f"[DEBUG] Validating agent name: '{agent_name}'", level=logging.DEBUG)
+    if "," in agent_name:
+        # Multiple agents
+        agent_list = [agent.strip() for agent in agent_name.split(",")]
+        logger.log_message(f"[DEBUG] Multiple agents detected: {agent_list}", level=logging.DEBUG)
+        for agent in agent_list:
+            is_available = _is_agent_available(agent, session_state)
+            logger.log_message(f"[DEBUG] Agent '{agent}' availability: {is_available}", level=logging.DEBUG)
+            if not is_available:
+                available_agents = _get_available_agents_list(session_state)
+                logger.log_message(f"[DEBUG] Agent '{agent}' not found. Available: {available_agents}", level=logging.DEBUG)
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Agent '{agent}' not found. Available agents: {available_agents}"
+                )
+    else:
+        # Single agent
+        is_available = _is_agent_available(agent_name, session_state)
+        logger.log_message(f"[DEBUG] Single agent '{agent_name}' availability: {is_available}", level=logging.DEBUG)
+        if not is_available:
+            available_agents = _get_available_agents_list(session_state)
+            logger.log_message(f"[DEBUG] Agent '{agent_name}' not found. Available: {available_agents}", level=logging.DEBUG)
+            raise HTTPException(
+                status_code=400,
+                detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}"
+            )
+    logger.log_message(f"[DEBUG] Agent validation passed for: '{agent_name}'", level=logging.DEBUG)
+def _is_agent_available(agent_name: str, session_state: dict = None) -> bool:
+    """Check if an agent is available (standard, template, or custom)"""
+    # Check if it's a standard agent
+    if _is_standard_agent(agent_name):
+        return True
+    # Check if it's a template agent
+    if _is_template_agent(agent_name):
+        return True
+    # Check if it's a custom agent in session
+    if session_state and "ai_system" in session_state:
+        ai_system = session_state["ai_system"]
+        if hasattr(ai_system, 'agents') and agent_name in ai_system.agents:
+            return True
+    return False
+def _get_available_agents_list(session_state: dict = None) -> list:
+    """Get list of all available agents from database"""
+    from src.db.init_db import session_factory
+    from src.agents.agents import load_all_available_templates_from_db
+    # Core agents (always available)
+    available = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"]
+    # Add template agents from database
+    db_session = session_factory()
+    try:
+        template_agents_dict = load_all_available_templates_from_db(db_session)
+        # template_agents_dict is a dict with template_name as keys
+        template_names = [template_name for template_name in template_agents_dict.keys()
+                         if template_name not in available and template_name != 'basic_qa_agent']
+        available.extend(template_names)
+    except Exception as e:
+        logger.log_message(f"Error loading template agents: {str(e)}", level=logging.ERROR)
+    finally:
+        db_session.close()
+    return available
+def _is_standard_agent(agent_name: str) -> bool:
+    """Check if agent is one of the 4 core standard agents"""
+    standard_agents = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"]
+    return agent_name in standard_agents
+def _is_template_agent(agent_name: str) -> bool:
+    """Check if agent is a template agent"""
+    try:
+        from src.db.init_db import session_factory
+        from src.db.schemas.models import AgentTemplate
+        db_session = session_factory()
+        try:
+            template = db_session.query(AgentTemplate).filter(
+                AgentTemplate.template_name == agent_name,
+                AgentTemplate.is_active == True
+            ).first()
+            return template is not None
+        finally:
+            db_session.close()
+    except Exception as e:
+        logger.log_message(f"Error checking if {agent_name} is template: {str(e)}", level=logging.ERROR)
+        return False
+async def _execute_custom_agents(ai_system, agent_names: list, query: str):
+    """Execute custom agents using the session's AI system"""
+    try:
+        # For custom agents, we need to use the AI system's execute_agent method
+        agent_results = [ai_system]
+        if len(agent_names) == 1:
+            # Single custom agent
+            agent_name = agent_names[0]
+            # Prepare inputs for the custom agent (similar to standard agents like data_viz_agent)
+            dict_ = {}
+            dict_['dataset'] = ai_system.dataset.retrieve(query)[0].text
+            dict_['styling_index'] = ai_system.styling_index.retrieve(query)[0].text
+            dict_['goal'] = query
+            dict_['Agent_desc'] = str(ai_system.agent_desc)
+            # Get input fields for this agent
+            if agent_name in ai_system.agent_inputs:
+                inputs = {x: dict_[x] for x in ai_system.agent_inputs[agent_name] if x in dict_}
+                # Execute the custom agent
+                agent_name_result, result_dict = await ai_system.agents[agent_name](**inputs)
+                return {agent_name_result: result_dict}
+            else:
+                logger.log_message(f"Agent '{agent_name}' not found in ai_system.agent_inputs", level=logging.ERROR)
+                return {"error": f"Agent '{agent_name}' input configuration not found"}
+        else:
+            # Multiple agents - execute sequentially
+            results = {}
+            for agent_name in agent_names:
+                single_result = await _execute_custom_agents(ai_system, [agent_name], query)
+                results.update(single_result)
+            return results
+    except Exception as e:
+        logger.log_message(f"Error in _execute_custom_agents: {str(e)}", level=logging.ERROR)
+        return {"error": f"Error executing custom agents: {str(e)}"}
+def _prepare_query_with_context(query: str, session_state: dict) -> str:
+    """Prepare the query with chat context from previous messages"""
+    chat_id = session_state.get("chat_id")
+    if not chat_id:
+        return query
+    # Get chat manager from app state
+    chat_manager = app.state._session_manager.chat_manager
+    # Get recent messages
+    recent_messages = chat_manager.get_recent_chat_history(chat_id, limit=MAX_RECENT_MESSAGES)
+    # Extract response history
+    chat_context = chat_manager.extract_response_history(recent_messages)
+    # Append context to the query if available
+    if chat_context:
+        return f"### Current Query:\n{query}\n\n{chat_context}"
+    return query
+def _track_model_usage(session_state: dict, enhanced_query: str, response, processing_time_ms: int):
+    """Track model usage statistics in the database"""
+    try:
+        ai_manager = app.state.get_ai_manager()
+        # Get model configuration
+        model_config = session_state.get("model_config", DEFAULT_MODEL_CONFIG)
+        model_name = model_config.get("model", DEFAULT_MODEL_CONFIG["model"])
+        provider = ai_manager.get_provider_for_model(model_name)
+        # Calculate token usage
+        try:
+            # Try exact tokenization
+            prompt_tokens = len(ai_manager.tokenizer.encode(enhanced_query))
+            completion_tokens = len(ai_manager.tokenizer.encode(str(response)))
+            total_tokens = prompt_tokens + completion_tokens
+        except Exception as token_error:
+            # Fall back to estimation
+            logger.log_message(f"Tokenization error: {str(token_error)}", level=logging.WARNING)
+            prompt_words = len(enhanced_query.split())
+            completion_words = len(str(response).split())
+            prompt_tokens = int(prompt_words * DEFAULT_TOKEN_RATIO)
+            completion_tokens = int(completion_words * DEFAULT_TOKEN_RATIO)
+            total_tokens = prompt_tokens + completion_tokens
+        # Calculate cost
+        cost = ai_manager.calculate_cost(model_name, prompt_tokens, completion_tokens)
+        # Save usage to database
+        ai_manager.save_usage_to_db(
+            user_id=session_state.get("user_id"),
+            chat_id=session_state.get("chat_id"),
+            model_name=model_name,
+            provider=provider,
+            prompt_tokens=int(prompt_tokens),
+            completion_tokens=int(completion_tokens),
+            total_tokens=int(total_tokens),
+            query_size=len(enhanced_query),
+            response_size=len(str(response)),
+            cost=round(cost, 7),
+            request_time_ms=processing_time_ms,
+            is_streaming=False
+        )
+    except Exception as e:
+        # Log but don't fail the request if usage tracking fails
+        logger.log_message(f"Failed to track model usage: {str(e)}", level=logging.ERROR)
+async def _generate_streaming_responses(session_state: dict, query: str, session_lm):
+    """Generate streaming responses for chat_with_all endpoint"""
+    overall_start_time = time.time()
+    total_response = ""
+    total_inputs = ""
+    usage_records = []
+    # Add chat context from previous messages
+    enhanced_query = _prepare_query_with_context(query, session_state)
+    # try:
+        # Get the plan - planner is now async, so we need to await it
+    plan_response = await session_state["ai_system"].get_plan(enhanced_query)
+    plan_description = format_response_to_markdown(
+        {"analytical_planner": plan_response},
+        dataframe=session_state["current_df"]
+    )
+    # Check if plan is valid
+    if plan_description == RESPONSE_ERROR_INVALID_QUERY:
+        yield json.dumps({
+            "agent": "Analytical Planner",
+            "content": plan_description,
+            "status": "error"
+        }) + "\n"
+        return
+    yield json.dumps({
+        "agent": "Analytical Planner",
+        "content": plan_description,
+        "status": "success" if plan_description else "error"
+    }) + "\n"
+    # Track planner usage
+    if session_state.get("user_id"):
+        planner_tokens = _estimate_tokens(ai_manager=app.state.ai_manager,
+                                        input_text=enhanced_query,
+                                        output_text=plan_description)
+        usage_records.append(_create_usage_record(
+            session_state=session_state,
+            model_name=session_state.get("model_config", DEFAULT_MODEL_CONFIG)["model"],
+            prompt_tokens=planner_tokens["prompt"],
+            completion_tokens=planner_tokens["completion"],
+            query_size=len(enhanced_query),
+            response_size=len(plan_description),
+            processing_time_ms=int((time.time() - overall_start_time) * 1000),
+            is_streaming=False
+        ))
+    logger.log_message(f"Plan response: {plan_response}", level=logging.INFO)
+    logger.log_message(f"Plan response type: {type(plan_response)}", level=logging.INFO)
+    # Check if plan_response is valid
+    # if not plan_response or not isinstance(plan_response, dict):
+    #     yield json.dumps({
+    #         "agent": "Analytical Planner",
+    #         "content": "**Error: Invalid plan response**\n\nResponse: " + str(plan_response),
+    #         "status": "error"
+    #     }) + "\n"
+    #     return
+    # Execute the plan with well-managed concurrency
+    with dspy.context(lm = session_lm):
+        # try:
+        async for agent_name, inputs, response in session_state["ai_system"].execute_plan(enhanced_query, plan_response):
+            if agent_name == "plan_not_found":
+                yield json.dumps({
+                    "agent": "Analytical Planner",
+                    "content": "**No plan found**\n\nPlease try again with a different query or try using a different model.",
+                    "status": "error"
+                }) + "\n"
+                return
+            if agent_name == "plan_not_formated_correctly":
+                yield json.dumps({
+                    "agent": "Analytical Planner",
+                    "content": "**Something went wrong with formatting, retry the query!**",
+                    "status": "error"
+                }) + "\n"
+                return
+            formatted_response = format_response_to_markdown(
+                {agent_name: response},
+                dataframe=session_state["current_df"]
+            )
+            yield json.dumps({
+                "agent": agent_name.split("__")[0] if "__" in agent_name else agent_name,
+                "content": formatted_response,
+                "status": "success" if response else "error"
+            }) + "\n"
+            # Handle agent errors
+            if isinstance(response, dict) and "error" in response:
+                yield json.dumps({
+                    "agent": agent_name,
+                    "content": f"**Error in {agent_name}**: {response['error']}",
+                    "status": "error"
+                }) + "\n"
+                continue  # Continue with next agent instead of returning
+            if formatted_response == RESPONSE_ERROR_INVALID_QUERY:
+                yield json.dumps({
+                    "agent": agent_name,
+                    "content": formatted_response,
+                    "status": "error"
+                }) + "\n"
+                continue  # Continue with next agent instead of returning
+            # Send response chunk
+            # Track agent usage for future batch DB write
+            if session_state.get("user_id"):
+                agent_tokens = _estimate_tokens(
+                    ai_manager=app.state.ai_manager,
+                    input_text=str(inputs),
+                    output_text=str(response)
+                )
+                # Get appropriate model name for code combiner
+                if "code_combiner_agent" in agent_name and "__" in agent_name:
+                    provider = agent_name.split("__")[1]
+                    model_name = _get_model_name_for_provider(provider)
+                else:
+                    model_name = session_state.get("model_config", DEFAULT_MODEL_CONFIG)["model"]
+                usage_records.append(_create_usage_record(
+                    session_state=session_state,
+                    model_name=model_name,
+                    prompt_tokens=agent_tokens["prompt"],
+                    completion_tokens=agent_tokens["completion"],
+                    query_size=len(str(inputs)),
+                    response_size=len(str(response)),
+                    processing_time_ms=int((time.time() - overall_start_time) * 1000),
+                    is_streaming=True
+                ))
+        # except asyncio.TimeoutError:
+        #     yield json.dumps({
+        #         "agent": "planner",
+        #         "content": "The request timed out. Please try a simpler query.",
+        #         "status": "error"
+        #     }) + "\n"
+        #     return
+        # except Exception as e:
+        #     logger.log_message(f"Error executing plan: {str(e)}", level=logging.ERROR)
+        #     yield json.dumps({
+        #         "agent": "planner",
+        #         "content": f"An error occurred while executing the plan: {str(e)}",
+        #         "status": "error"
+        #     }) + "\n"
+        #     return
+    # except Exception as e:
+    #         logger.log_message(f"Error in streaming response: {str(e)}", level=logging.ERROR)
+    #         yield json.dumps({
+    #             "agent": "planner",
+    #             "content": "An error occurred while generating responses. Please try again!" + str(e) + str({k: v for k, v in session_lm.__dict__['kwargs'].items() if k != 'api_key'}),
+    #             "status": "error"
+    #         }) + "\n"
+def _estimate_tokens(ai_manager, input_text: str, output_text: str) -> dict:
+    """Estimate token counts, with fallback for tokenization errors"""
+    try:
+        # Try exact tokenization
+        prompt_tokens = len(ai_manager.tokenizer.encode(input_text))
+        completion_tokens = len(ai_manager.tokenizer.encode(output_text))
+    except Exception:
+        # Fall back to estimation
+        prompt_words = len(input_text.split())
+        completion_words = len(output_text.split())
+        prompt_tokens = int(prompt_words * DEFAULT_TOKEN_RATIO)
+        completion_tokens = int(completion_words * DEFAULT_TOKEN_RATIO)
+    return {
+        "prompt": prompt_tokens,
+        "completion": completion_tokens,
+        "total": prompt_tokens + completion_tokens
+    }
+def _create_usage_record(session_state: dict, model_name: str, prompt_tokens: int,
+                        completion_tokens: int, query_size: int, response_size: int,
+                        processing_time_ms: int, is_streaming: bool) -> dict:
+    """Create a usage record for the database"""
+    ai_manager = app.state.get_ai_manager()
+    provider = ai_manager.get_provider_for_model(model_name)
+    cost = ai_manager.calculate_cost(model_name, prompt_tokens, completion_tokens)
+    return {
+        "user_id": session_state.get("user_id"),
+        "chat_id": session_state.get("chat_id"),
+        "model_name": model_name,
+        "provider": provider,
+        "prompt_tokens": int(prompt_tokens),
+        "completion_tokens": int(completion_tokens),
+        "total_tokens": int(prompt_tokens + completion_tokens),
+        "query_size": query_size,
+        "response_size": response_size,
+        "cost": round(cost, 7),
+        "request_time_ms": processing_time_ms,
+        "is_streaming": is_streaming
+    }
+def _get_model_name_for_provider(provider: str) -> str:
+    """Get the model name for a provider"""
+    provider_model_map = {
+        "openai": "o3-mini",
+        "anthropic": "claude-3-7-sonnet-latest",
+        "gemini": "gemini-2.5-pro-preview-03-25"
+    }
+    return provider_model_map.get(provider, "o3-mini")
+# Add an endpoint to list available agents
+@app.get("/agents", response_model=dict)
+async def list_agents(request: Request, session_id: str = Depends(get_session_id_dependency)):
+    """Get all available agents (standard, template, and custom)"""
+    session_state = app.state.get_session_state(session_id)
+    try:
+        # Get all available agents from database and session
+        available_agents_list = _get_available_agents_list(session_state)
+        # Categorize agents
+        standard_agents = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"]
+        # Get template agents from database
+        from src.db.init_db import session_factory
+        from src.agents.agents import load_all_available_templates_from_db
+        db_session = session_factory()
+        try:
+            template_agents_dict = load_all_available_templates_from_db(db_session)
+            # template_agents_dict is a dict with template_name as keys
+            template_agents = [template_name for template_name in template_agents_dict.keys()
+                             if template_name not in standard_agents and template_name != 'basic_qa_agent']
+        except Exception as e:
+            logger.log_message(f"Error loading template agents in /agents endpoint: {str(e)}", level=logging.ERROR)
+            template_agents = []
+        finally:
+            db_session.close()
+        # Get custom agents from session
+        custom_agents = []
+        if session_state and "ai_system" in session_state:
+            ai_system = session_state["ai_system"]
+            if hasattr(ai_system, 'agents'):
+                custom_agents = [agent for agent in available_agents_list
+                               if agent not in standard_agents and agent not in template_agents]
+        # Ensure template agents are in the available list
+        for template_agent in template_agents:
+            if template_agent not in available_agents_list:
+                available_agents_list.append(template_agent)
+        return {
+            "available_agents": available_agents_list,
+            "standard_agents": standard_agents,
+            "template_agents": template_agents,
+            "custom_agents": custom_agents
+        }
+    except Exception as e:
+        logger.log_message(f"Error getting agents list: {str(e)}", level=logging.ERROR)
+        raise HTTPException(status_code=500, detail=f"Error getting agents list: {str(e)}")
+@app.get("/health", response_model=dict)
+async def health():
+    return {"message": "API is healthy and running"}
+@app.get("/")
+async def index():
+    return {
+        "title": "Welcome to the AI Analytics API",
+        "message": "Explore our API for advanced analytics and visualization tools designed to empower your data-driven decisions.",
+        "description": "Utilize our powerful agents and models to gain insights from your data effortlessly.",
+        "colors": {
+            "primary": "#007bff",
+            "secondary": "#6c757d",
+            "success": "#28a745",
+            "danger": "#dc3545",
+        },
+        "features": [
+            "Real-time data processing",
+            "Customizable visualizations",
+            "Seamless integration with various data sources",
+            "User-friendly interface for easy navigation",
+            "Custom Analytics",
+        ],
+    }
+@app.post("/chat_history_name")
+async def chat_history_name(request: dict, session_id: str = Depends(get_session_id_dependency)):
+    query = request.get("query")
+    name = None
+    lm = dspy.LM(model="gpt-4o-mini", max_tokens=300, temperature=0.5)
+    with dspy.context(lm=lm):
+        name = app.state.get_chat_history_name_agent()(query=str(query))
+    return {"name": name.name if name else "New Chat"}
+@app.post("/deep_analysis_streaming")
+async def deep_analysis_streaming(
+    request: DeepAnalysisRequest,
+    request_obj: Request,
+    session_id: str = Depends(get_session_id_dependency)
+):
+    """Perform streaming deep analysis with real-time updates"""
+    session_state = app.state.get_session_state(session_id)
+    try:
+        # Extract and validate query parameters
+        _update_session_from_query_params(request_obj, session_state)
+        # Validate dataset
+        if session_state["current_df"] is None:
+            raise HTTPException(status_code=400, detail=RESPONSE_ERROR_NO_DATASET)
+        # Get user_id from session state (if available)
+        user_id = session_state.get("user_id")
+        # Generate a UUID for this report
+        import uuid
+        report_uuid = str(uuid.uuid4())
+        # Create initial pending report in the database
+        try:
+            from src.db.init_db import session_factory
+            from src.db.schemas.models import DeepAnalysisReport
+            db_session = session_factory()
+            try:
+                # Create a pending report entry
+                new_report = DeepAnalysisReport(
+                    report_uuid=report_uuid,
+                    user_id=user_id,
+                    goal=request.goal,
+                    status="pending",
+                    start_time=datetime.now(UTC),
+                    progress_percentage=0
+                )
+                db_session.add(new_report)
+                db_session.commit()
+                db_session.refresh(new_report)
+                # Store the report ID in session state for later updates
+                session_state["current_deep_analysis_id"] = new_report.report_id
+                session_state["current_deep_analysis_uuid"] = report_uuid
+            except Exception as e:
+                logger.log_message(f"Error creating initial deep analysis report: {str(e)}", level=logging.ERROR)
+                # Continue even if DB storage fails
+            finally:
+                db_session.close()
+        except Exception as e:
+            logger.log_message(f"Database operation failed: {str(e)}", level=logging.ERROR)
+            # Continue even if DB operation fails
+        # Get session-specific model
+        # session_lm = get_session_lm(session_state)
+        session_lm = dspy.LM(model="anthropic/claude-sonnet-4-20250514", max_tokens=7000, temperature=0.5)
+        return StreamingResponse(
+            _generate_deep_analysis_stream(session_state, request.goal, session_lm, session_id),
+            media_type='text/event-stream',
+            headers={
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'Content-Type': 'text/event-stream',
+                'Access-Control-Allow-Origin': '*',
+                'X-Accel-Buffering': 'no'
+            }
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.log_message(f"Streaming deep analysis failed: {str(e)}", level=logging.ERROR)
+        raise HTTPException(status_code=500, detail=f"Streaming deep analysis failed: {str(e)}")
+async def _generate_deep_analysis_stream(session_state: dict, goal: str, session_lm, session_id: str):
+    """Generate streaming responses for deep analysis"""
+    # Track the start time for duration calculation
+    start_time = datetime.now(UTC)
+    try:
+        # Get dataset info
+        df = session_state["current_df"]
+        dtypes_info = pd.DataFrame({
+            'Column': df.columns,
+            'Data Type': df.dtypes.astype(str)
+        }).to_markdown()
+        dataset_info = f"Sample Data:\n{df.head(2).to_markdown()}\n\nData Types:\n{dtypes_info}"
+        # Get report info from session state
+        report_id = session_state.get("current_deep_analysis_id")
+        report_uuid = session_state.get("current_deep_analysis_uuid")
+        user_id = session_state.get("user_id")
+        # Helper function to update report in database
+        async def update_report_in_db(status, progress, step=None, content=None):
+            if not report_id:
+                return
+            try:
+                from src.db.init_db import session_factory
+                from src.db.schemas.models import DeepAnalysisReport
+                db_session = session_factory()
+                try:
+                    report = db_session.query(DeepAnalysisReport).filter(DeepAnalysisReport.report_id == report_id).first()
+                    if report:
+                        report.status = status
+                        report.progress_percentage = progress
+                        # Update step-specific fields if provided
+                        if step == "questions" and content:
+                            report.deep_questions = content
+                        elif step == "planning" and content:
+                            report.deep_plan = content
+                        elif step == "analysis" and content:
+                            # For analysis step, we get the full object with multiple fields
+                            if isinstance(content, dict):
+                                # Update fields from content if they exist
+                                if "deep_questions" in content and content["deep_questions"]:
+                                    report.deep_questions = content["deep_questions"]
+                                if "deep_plan" in content and content["deep_plan"]:
+                                    report.deep_plan = content["deep_plan"]
+                                if "code" in content and content["code"]:
+                                    report.analysis_code = content["code"]
+                                if "final_conclusion" in content and content["final_conclusion"]:
+                                    report.final_conclusion = content["final_conclusion"]
+                                    # Also update summary from conclusion
+                                    conclusion = content["final_conclusion"]
+                                    conclusion = conclusion.replace("**Conclusion**", "")
+                                    report.report_summary = conclusion[:200] + "..." if len(conclusion) > 200 else conclusion
+                                # Handle JSON fields
+                                if "summaries" in content and content["summaries"]:
+                                    report.summaries = json.dumps(content["summaries"])
+                                if "plotly_figs" in content and content["plotly_figs"]:
+                                    report.plotly_figures = json.dumps(content["plotly_figs"])
+                                if "synthesis" in content and content["synthesis"]:
+                                    report.synthesis = json.dumps(content["synthesis"])
+                        # For the final step, update the HTML report
+                        if step == "completed":
+                            if content:
+                                report.html_report = content
+                            else:
+                                logger.log_message("No HTML content provided for completed step", level=logging.WARNING)
+                            report.end_time = datetime.now(UTC)
+                            # Ensure start_time is timezone-aware before calculating duration
+                            if report.start_time.tzinfo is None:
+                                start_time_utc = report.start_time.replace(tzinfo=UTC)
+                            else:
+                                start_time_utc = report.start_time
+                            report.duration_seconds = int((report.end_time - start_time_utc).total_seconds())
+                        report.updated_at = datetime.now(UTC)
+                        db_session.commit()
+                except Exception as e:
+                    db_session.rollback()
+                    logger.log_message(f"Error updating deep analysis report: {str(e)}", level=logging.ERROR)
+                finally:
+                    db_session.close()
+            except Exception as e:
+                logger.log_message(f"Database operation failed: {str(e)}", level=logging.ERROR)
+        # Use session model for this request
+        with dspy.context(lm=session_lm):
+            # Send initial status
+            yield json.dumps({
+                "step": "initialization",
+                "status": "starting",
+                "message": "Initializing deep analysis...",
+                "progress": 5
+            }) + "\n"
+            # Update DB status to running
+            await update_report_in_db("running", 5)
+            # Get deep analyzer - use the correct session_id from the session_state
+            logger.log_message(f"Getting deep analyzer for session_id: {session_id}, user_id: {user_id}", level=logging.INFO)
+            deep_analyzer = app.state.get_deep_analyzer(session_id)
+            # Make the dataset available globally for code execution
+            globals()['df'] = df
+            # Use the new streaming method and forward all progress updates
+            final_result = None
+            async for update in deep_analyzer.execute_deep_analysis_streaming(
+                goal=goal,
+                dataset_info=dataset_info,
+                session_df=df
+            ):
+                # Convert the update to the expected format and yield it
+                if update.get("step") == "questions" and update.get("status") == "completed":
+                    # Update DB with questions
+                    await update_report_in_db("running", update.get("progress", 0), "questions", update.get("content"))
+                elif update.get("step") == "planning" and update.get("status") == "completed":
+                    # Update DB with planning
+                    await update_report_in_db("running", update.get("progress", 0), "planning", update.get("content"))
+                elif update.get("step") == "conclusion" and update.get("status") == "completed":
+                    # Store the final result for later processing
+                    final_result = update.get("final_result")
+                    # Convert Plotly figures to JSON format for network transmission
+                    if final_result:
+                        import plotly.io
+                        serialized_return_dict = final_result.copy()
+                        # Convert plotly_figs to JSON format
+                        if 'plotly_figs' in serialized_return_dict and serialized_return_dict['plotly_figs']:
+                            json_figs = []
+                            for fig_list in serialized_return_dict['plotly_figs']:
+                                if isinstance(fig_list, list):
+                                    json_fig_list = []
+                                    for fig in fig_list:
+                                        if hasattr(fig, 'to_json'):  # Check if it's a Plotly figure
+                                            json_fig_list.append(plotly.io.to_json(fig))
+                                        else:
+                                            json_fig_list.append(fig)  # Already JSON or other format
+                                    json_figs.append(json_fig_list)
+                                else:
+                                    # Single figure case
+                                    if hasattr(fig_list, 'to_json'):
+                                        json_figs.append(plotly.io.to_json(fig_list))
+                                    else:
+                                        json_figs.append(fig_list)
+                            serialized_return_dict['plotly_figs'] = json_figs
+                        # Update DB with analysis results
+                        await update_report_in_db("running", update.get("progress", 0), "analysis", serialized_return_dict)
+                        # Generate HTML report using the original final_result with Figure objects
+                        html_report = None
+                        try:
+                            html_report = generate_html_report(final_result)
+                        except Exception as e:
+                            logger.log_message(f"Error generating HTML report: {str(e)}", level=logging.ERROR)
+                            # Continue even if HTML generation fails
+                        # Send the analysis results
+                        yield json.dumps({
+                            "step": "analysis",
+                            "status": "completed",
+                            "content": serialized_return_dict,
+                            "progress": 90
+                        }) + "\n"
+                        # Send report generation status
+                        yield json.dumps({
+                            "step": "report",
+                            "status": "processing",
+                            "message": "Generating final report...",
+                            "progress": 95
+                        }) + "\n"
+                        # Send final completion
+                        yield json.dumps({
+                            "step": "completed",
+                            "status": "success",
+                            "analysis": serialized_return_dict,
+                            "html_report": html_report,
+                            "progress": 100
+                        }) + "\n"
+                        # Update DB with completed report (with HTML if generated)
+                        if html_report:
+                            logger.log_message(f"Saving HTML report to database, length: {len(html_report)}", level=logging.INFO)
+                        else:
+                            logger.log_message("No HTML report to save to database", level=logging.WARNING)
+                        await update_report_in_db("completed", 100, "completed", html_report)
+                elif update.get("step") == "error":
+                    # Forward error directly
+                    yield json.dumps(update) + "\n"
+                    await update_report_in_db("failed", 0)
+                    return
+                else:
+                    # Forward all other progress updates
+                    yield json.dumps(update) + "\n"
+            # If we somehow exit the loop without getting a final result, that's an error
+            if not final_result:
+                yield json.dumps({
+                    "step": "error",
+                    "status": "failed",
+                    "message": "Deep analysis completed without final result",
+                    "progress": 0
+                }) + "\n"
+                await update_report_in_db("failed", 0)
+    except Exception as e:
+        logger.log_message(f"Error in deep analysis stream: {str(e)}", level=logging.ERROR)
+        yield json.dumps({
+            "step": "error",
+            "status": "failed",
+            "message": f"Deep analysis failed: {str(e)}",
+            "progress": 0
+        }) + "\n"
+        # Update DB with error status
+        if 'update_report_in_db' in locals() and session_state.get("current_deep_analysis_id"):
+            await update_report_in_db("failed", 0)
+@app.post("/deep_analysis/download_report")
+async def download_html_report(
+    request: dict,
+    session_id: str = Depends(get_session_id_dependency)
+):
+    """Download HTML report from previous deep analysis"""
+    try:
+        analysis_data = request.get("analysis_data")
+        if not analysis_data:
+            raise HTTPException(status_code=400, detail="No analysis data provided")
+        # Get report UUID from request if available (for saving to DB)
+        report_uuid = request.get("report_uuid")
+        session_state = app.state.get_session_state(session_id)
+        # If no report_uuid in request, try to get it from session state
+        if not report_uuid and session_state.get("current_deep_analysis_uuid"):
+            report_uuid = session_state.get("current_deep_analysis_uuid")
+        # Convert JSON-serialized Plotly figures back to Figure objects for HTML generation
+        processed_data = analysis_data.copy()
+        if 'plotly_figs' in processed_data and processed_data['plotly_figs']:
+            import plotly.io
+            import plotly.graph_objects as go
+            figure_objects = []
+            for fig_list in processed_data['plotly_figs']:
+                if isinstance(fig_list, list):
+                    fig_obj_list = []
+                    for fig_json in fig_list:
+                        if isinstance(fig_json, str):
+                            # Convert JSON string back to Figure object
+                            try:
+                                fig_obj = plotly.io.from_json(fig_json)
+                                fig_obj_list.append(fig_obj)
+                            except Exception as e:
+                                logger.log_message(f"Error parsing Plotly JSON: {str(e)}", level=logging.WARNING)
+                                continue
+                        elif hasattr(fig_json, 'to_html'):
+                            # Already a Figure object
+                            fig_obj_list.append(fig_json)
+                    figure_objects.append(fig_obj_list)
+                else:
+                    # Single figure case
+                    if isinstance(fig_list, str):
+                        try:
+                            fig_obj = plotly.io.from_json(fig_list)
+                            figure_objects.append(fig_obj)
+                        except Exception as e:
+                            logger.log_message(f"Error parsing Plotly JSON: {str(e)}", level=logging.WARNING)
+                            continue
+                    elif hasattr(fig_list, 'to_html'):
+                        figure_objects.append(fig_list)
+            processed_data['plotly_figs'] = figure_objects
+        # Generate HTML report
+        html_report = generate_html_report(processed_data)
+        # Save report to database if we have a UUID
+        if report_uuid:
+            try:
+                from src.db.init_db import session_factory
+                from src.db.schemas.models import DeepAnalysisReport
+                db_session = session_factory()
+                try:
+                    # Try to find existing report by UUID
+                    report = db_session.query(DeepAnalysisReport).filter(DeepAnalysisReport.report_uuid == report_uuid).first()
+                    if report:
+                        # Update existing report with HTML content
+                        report.html_report = html_report
+                        report.updated_at = datetime.now(UTC)
+                        db_session.commit()
+                except Exception as e:
+                    db_session.rollback()
+                finally:
+                    db_session.close()
+            except Exception as e:
+                logger.log_message(f"Database operation failed when storing HTML report: {str(e)}", level=logging.ERROR)
+                # Continue even if DB storage fails
+        # Create a filename with timestamp
+        timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
+        filename = f"deep_analysis_report_{timestamp}.html"
+        # Return as downloadable file
+        return StreamingResponse(
+            iter([html_report.encode('utf-8')]),
+            media_type='text/html',
+            headers={
+                'Content-Disposition': f'attachment; filename="{filename}"',
+                'Content-Type': 'text/html; charset=utf-8'
+            }
+        )
+    except Exception as e:
+        logger.log_message(f"Failed to generate HTML report: {str(e)}", level=logging.ERROR)
+        raise HTTPException(status_code=500, detail=f"Failed to generate report: {str(e)}")
+# In the section where routers are included, add the session_router
+app.include_router(chat_router)
+app.include_router(analytics_router)
+app.include_router(code_router)
+app.include_router(session_router)
+app.include_router(feedback_router)
+app.include_router(deep_analysis_router)
+app.include_router(templates_router)
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

docs/README.md ADDED Viewed

	@@ -0,0 +1,251 @@

+# Auto-Analyst Backend Documentation
+This directory contains comprehensive documentation for the Auto-Analyst backend - a sophisticated multi-agent AI platform for data analysis built with FastAPI, DSPy, and modern Python technologies.
+## 📁 Documentation Structure
+### **🏗️ Architecture** (`/architecture/`)
+- **[System Architecture](./architecture/architecture.md)** - Comprehensive overview of backend system design, components, and data flow patterns
+### **🚀 Development** (`/development/`)
+- **[Development Workflow](./development/development_workflow.md)** - Complete development guide with patterns, best practices, and code organization principles
+### **🔧 System** (`/system/`)
+- **[Database Schema](./system/database-schema.md)** - Complete database schema with all tables, relationships, and performance optimization
+- **[Shared DataFrame System](./system/shared_dataframe.md)** - Inter-agent data sharing and session management
+### **🌐 API** (`/api/`)
+- **[API Endpoints Overview](./api/README.md)** - Main API reference hub
+- **[Route Documentation](./api/routes/)** - Detailed endpoint documentation:
+  - **[Core Routes](./api/routes/session.md)** - File uploads, sessions, authentication
+  - **[Chat Routes](./api/routes/chats.md)** - Chat and messaging endpoints
+  - **[Code Routes](./api/routes/code.md)** - Code execution and processing
+  - **[Analytics Routes](./api/routes/analytics.md)** - Usage analytics and monitoring
+  - **[Deep Analysis Routes](./api/routes/deep_analysis.md)** - Multi-agent analysis system
+  - **[Template Routes](./api/routes/templates.md)** - Agent template management
+  - **[Feedback Routes](./api/routes/feedback.md)** - User feedback and rating system
+### **🐛 Troubleshooting** (`/troubleshooting/`)
+- **[Troubleshooting Guide](./troubleshooting/troubleshooting.md)** - Common issues, debugging tools, and solutions
+## 🎯 Backend Overview
+### **Tech Stack**
+- **FastAPI** - Modern async Python web framework
+- **DSPy** - AI agent orchestration and LLM integration
+- **SQLAlchemy** - Database ORM with PostgreSQL/SQLite support
+- **Plotly** - Interactive data visualizations
+- **Pandas/NumPy** - Data manipulation and analysis
+- **Scikit-learn** - Machine learning models
+- **Statsmodels** - Statistical analysis
+### **Core Features**
+- **Multi-Agent System** - 4+ specialized AI agents for different analysis tasks
+- **Template System** - User-customizable agent configurations
+- **Deep Analysis** - Multi-step analytical workflows with streaming progress
+- **Session Management** - Stateful user sessions with shared data context
+- **Code Execution** - Safe Python code execution environment
+- **Real-time Streaming** - WebSocket support for live analysis updates
+### **Agent Types**
+1. **Data Preprocessing Agent** - Data cleaning and preparation
+2. **Statistical Analytics Agent** - Statistical analysis using statsmodels
+3. **Machine Learning Agent** - ML modeling with scikit-learn
+4. **Data Visualization Agent** - Interactive charts with Plotly
+5. **Feature Engineering Agent** (Premium) - Advanced feature creation
+6. **Polars Agent** (Premium) - High-performance data processing
+## 🚀 Quick Start Guide
+### **1. Environment Setup**
+```bash
+# Navigate to backend directory
+cd Auto-Analyst-CS/auto-analyst-backend
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # Linux/Mac
+venv\Scripts\activate     # Windows
+# Install dependencies
+pip install -r requirements.txt
+```
+### **2. Environment Configuration**
+Create `.env` file with required variables:
+```env
+# Database Configuration
+DATABASE_URL=sqlite:///./chat_database.db
+# AI Model Configuration
+OPENAI_API_KEY=your-openai-api-key
+MODEL_PROVIDER=openai  # openai, anthropic, groq, gemini
+MODEL_NAME=gpt-4o-mini
+TEMPERATURE=0.7
+MAX_TOKENS=6000
+# Optional: Additional AI Providers
+ANTHROPIC_API_KEY=your-anthropic-key
+GROQ_API_KEY=your-groq-key
+GEMINI_API_KEY=your-gemini-key
+# Security
+ADMIN_API_KEY=your-admin-key
+# Application Settings
+ENVIRONMENT=development
+FRONTEND_URL=http://localhost:3000/
+```
+### **3. Database Initialization**
+```bash
+# Initialize database and default agents
+python -c "
+from src.db.init_db import init_db
+init_db()
+print('✅ Database and agents initialized successfully')
+"
+```
+### **4. Start Development Server**
+```bash
+# Start the FastAPI server
+python -m app
+# Or with uvicorn for more control
+uvicorn app:app --reload --host 0.0.0.0 --port 8000
+```
+### **5. Verify Installation**
+- **API Documentation**: `http://localhost:8000/docs`
+- **Health Check**: `http://localhost:8000/health`
+## 🔧 Development Workflow
+### **Adding New Agents**
+1. **Define Agent Signature** in `src/agents/agents.py`
+2. **Add Configuration** to `agents_config.json`
+3. **Register Agent** in loading system
+4. **Test Integration** with multi-agent pipeline
+### **Adding New API Endpoints**
+1. **Create Route File** in `src/routes/`
+2. **Define Pydantic Models** for request/response
+3. **Implement Endpoints** with proper error handling
+4. **Register Router** in `app.py`
+5. **Update Documentation**
+### **Database Changes**
+1. **Modify Models** in `src/db/schemas/models.py`
+2. **Create Migration**: `alembic revision --autogenerate -m "description"`
+3. **Apply Migration**: `alembic upgrade head`
+4. **Update Documentation**
+## 📊 System Architecture
+### **Request Processing Flow**
+```
+HTTP Request → FastAPI Router → Route Handler → Business Logic →
+Database/Agent System → AI Model → Response Processing → JSON Response
+```
+### **Agent Execution Flow**
+```
+User Query → Session Manager → Agent Selection → Context Preparation →
+DSPy Chain → AI Model → Code Generation → Execution → Response Formatting
+```
+### **Deep Analysis Workflow**
+```
+Goal Input → Question Generation → Planning → Multi-Agent Execution →
+Code Synthesis → Result Compilation → HTML Report Generation
+```
+## 🧪 Testing & Validation
+### **API Testing**
+```bash
+# Interactive documentation
+open http://localhost:8000/docs
+# cURL examples
+curl -X GET "http://localhost:8000/health"
+curl -X POST "http://localhost:8000/chat/preprocessing_agent" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "Clean this dataset", "session_id": "test"}'
+```
+### **Agent Testing**
+```python
+# Test individual agents
+from src.agents.agents import preprocessing_agent
+import dspy
+# Configure DSPy
+lm = dspy.LM('openai/gpt-4o-mini', api_key='your-key')
+dspy.configure(lm=lm)
+# Test agent
+agent = dspy.ChainOfThought(preprocessing_agent)
+result = agent(goal='clean data', dataset='test dataset')
+print(result)
+```
+## 🔒 Security & Production
+### **Security Features**
+- **Session-based authentication** with secure session management
+- **API key protection** for admin endpoints
+- **Input validation** using Pydantic models
+- **Error handling** with proper HTTP status codes
+- **CORS configuration** for frontend integration
+### **Production Considerations**
+- **PostgreSQL database** for production deployment
+- **Environment variable management** for secrets
+- **Logging configuration** for monitoring
+- **Rate limiting** for API protection
+- **Performance optimization** for large datasets
+## 📈 Monitoring & Analytics
+The backend includes comprehensive analytics for:
+- **Usage tracking** - API endpoint usage and performance
+- **Model usage** - AI model consumption and costs
+- **User analytics** - User behavior and engagement
+- **Error monitoring** - System health and error tracking
+- **Performance metrics** - Response times and throughput
+## 🤝 Contributing
+1. **Follow coding standards** defined in development workflow
+2. **Add comprehensive tests** for new features
+3. **Update documentation** for all changes
+4. **Use proper error handling** patterns
+5. **Submit detailed pull requests** with clear descriptions
+---
+## 📖 Detailed Documentation
+For specific implementation details, refer to the organized documentation in each subdirectory:
+- **[Getting Started Guide](./getting_started.md)** - Complete setup walkthrough
+- **[Architecture Documentation](./architecture/)** - System design and components
+- **[Development Guides](./development/)** - Workflow and best practices
+- **[API Reference](./api/)** - Complete endpoint documentation
+- **[System Documentation](./system/)** - Database and core systems
+- **[Troubleshooting](./troubleshooting/)** - Debugging and solutions
+---
+**Need help?** Check the troubleshooting guide or refer to the comprehensive documentation in each section.

docs/api/README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# Auto-Analyst Backend API Documentation
+This document is a guide to the backend API endpoints utilized within the Auto-Analyst application. It encompasses a thorough breakdown of various aspects, including the handling of requests, the intricate processes of data transformations, and the structured responses that the API generates.
+The Auto-Analyst application is designed to facilitate seamless interactions and efficient data management, making it essential for users to understand the available endpoints and their functionalities. Each section of this documentation is crafted to provide clarity and insight into how the API operates, ensuring that developers and users alike can effectively leverage its capabilities.
+For more specific details regarding the various functionalities offered by the API, please refer to the following sections, which delve deeper into their respective areas:
+## 📚 Core Documentation
+- **[Getting Started Guide](./getting_started.md)**: Quick start guide for new developers and LLMs to understand the system architecture and get up to speed quickly
+- **[System Architecture](./architecture.md)**: Comprehensive overview of the backend system design, components, and data flow patterns
+- **[Troubleshooting Guide](./troubleshooting.md)**: Common issues, debugging tools, and solutions for development and deployment problems
+## 🛠️ API Reference
+- **[Core Endpoints](./routes/session.md)**: Review the core endpoints that handle fundamental operations within the application, including data uploads, AI analysis, model settings, and session management.
+- **[Analytics Endpoints](./routes/analytics.md)**: Explore the endpoints dedicated to analytics, providing insights into usage statistics, performance metrics, cost analysis, and real-time monitoring.
+- **[Chat Endpoints](./routes/chats.md)**: Discover the endpoints that manage chat interactions, enabling users to create, retrieve, and manage chat sessions effectively.
+- **[Code Endpoints](./routes/code.md)**: Learn about the endpoints for code execution, editing, fixing, and cleaning operations with advanced AI assistance.
+- **[Deep Analysis Endpoints](./routes/deep_analysis.md)**: Comprehensive documentation for the multi-agent deep analysis system, including streaming progress, report management, template integration, and how user's active agents are leveraged for advanced analytical insights.
+- **[Feedback Endpoints](./routes/feedback.md)**: Understand the endpoints for managing user feedback on AI-generated messages, including rating systems and model performance tracking.
+- **[Templates Endpoints](./routes/templates.md)**: Comprehensive guide to the template system, agent loading, user preferences, and how personalized AI agent configurations work for different users.

docs/api/routes/analytics.md ADDED Viewed

	@@ -0,0 +1,562 @@

+# Analytics Routes Documentation
+These routes provide comprehensive analytics functionality for the Auto-Analyst backend, including dashboard summaries, user analytics, model performance metrics, cost analysis, and system monitoring.
+## Authentication
+All analytics endpoints require admin authentication via API key:
+```python
+ADMIN_API_KEY = os.getenv("ADMIN_API_KEY", "default-admin-key-change-me")
+```
+The API key can be provided via:
+- **Header:** `X-Admin-API-Key`
+- **Query parameter:** `admin_api_key`
+---
+## Dashboard Endpoints
+### **GET /analytics/dashboard**
+Returns comprehensive dashboard data combining usage statistics, model performance, and user activity.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "total_tokens": 123456,
+  "total_cost": 25.50,
+  "total_requests": 1000,
+  "total_users": 50,
+  "daily_usage": [
+    {
+      "date": "2023-05-01",
+      "tokens": 5000,
+      "cost": 1.25,
+      "requests": 100
+    }
+  ],
+  "model_usage": [
+    {
+      "model_name": "claude-3-sonnet-20241022",
+      "tokens": 10000,
+      "cost": 10.00,
+      "requests": 200
+    }
+  ],
+  "top_users": [
+    {
+      "user_id": "123",
+      "tokens": 5000,
+      "cost": 5.00,
+      "requests": 50
+    }
+  ],
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01"
+}
+```
+### **WebSocket /analytics/dashboard/realtime**
+WebSocket endpoint for real-time dashboard updates. Accepts connections and maintains them for broadcasting live data updates.
+---
+## User Analytics Endpoints
+### **GET /analytics/users**
+Returns user list with usage statistics from the past 7 days.
+**Query Parameters:**
+- `limit` (optional): Maximum users to return (default: `100`)
+- `offset` (optional): Pagination offset (default: `0`)
+**Response:**
+```json
+{
+  "users": [
+    {
+      "user_id": "123",
+      "tokens": 5000,
+      "cost": 5.00,
+      "requests": 50,
+      "first_seen": "2023-04-01T12:00:00Z",
+      "last_seen": "2023-05-01T12:00:00Z"
+    }
+  ],
+  "total": 200,
+  "limit": 100,
+  "offset": 0
+}
+```
+### **GET /analytics/users/activity**
+Returns daily user activity metrics with new user tracking.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "user_activity": [
+    {
+      "date": "2023-05-01",
+      "activeUsers": 20,
+      "newUsers": 5,
+      "sessions": 30
+    }
+  ]
+}
+```
+### **GET /analytics/users/sessions/stats**
+Returns session statistics including total users, active users today, average queries per session, and average session time.
+**Response:**
+```json
+{
+  "totalUsers": 500,
+  "activeToday": 25,
+  "avgQueriesPerSession": 3.2,
+  "avgSessionTime": 300
+}
+```
+### **WebSocket /analytics/realtime**
+WebSocket endpoint for real-time user analytics updates.
+---
+## Model Analytics Endpoints
+### **GET /analytics/usage/models**
+Returns model usage breakdown with performance metrics.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "model_usage": [
+    {
+      "model_name": "claude-3-sonnet-20241022",
+      "tokens": 10000,
+      "cost": 10.00,
+      "requests": 200,
+      "avg_response_time": 1.5
+    }
+  ]
+}
+```
+### **GET /analytics/models/history**
+Returns daily model usage history with trend data.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "model_history": [
+    {
+      "date": "2023-05-01",
+      "models": [
+        {
+          "name": "claude-3-sonnet-20241022",
+          "tokens": 5000,
+          "requests": 100
+        }
+      ]
+    }
+  ]
+}
+```
+### **GET /analytics/models/metrics**
+Returns model performance metrics including success rates and response times.
+**Response:**
+```json
+{
+  "model_metrics": [
+    {
+      "name": "claude-3-sonnet-20241022",
+      "avg_tokens": 250.5,
+      "avg_response_time": 1.2,
+      "success_rate": 0.95
+    }
+  ]
+}
+```
+---
+## Cost Analytics Endpoints
+### **GET /analytics/costs/summary**
+Returns cost summary with averages and totals.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "totalCost": 25.50,
+  "totalTokens": 100000,
+  "totalRequests": 1000,
+  "avgDailyCost": 0.85,
+  "costPerThousandTokens": 0.255,
+  "daysInPeriod": 30,
+  "startDate": "2023-04-01",
+  "endDate": "2023-05-01"
+}
+```
+### **GET /analytics/costs/daily**
+Returns daily cost breakdown with filled gaps for missing dates.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "daily_costs": [
+    {
+      "date": "2023-05-01",
+      "cost": 1.25,
+      "tokens": 5000
+    }
+  ]
+}
+```
+### **GET /analytics/costs/models**
+Returns cost breakdown by model.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "model_costs": [
+    {
+      "model_name": "claude-3-sonnet-20241022",
+      "cost": 15.50,
+      "tokens": 50000,
+      "requests": 500
+    }
+  ]
+}
+```
+### **GET /analytics/costs/projections**
+Returns cost projections based on last 30 days usage.
+**Response:**
+```json
+{
+  "nextMonth": 75.00,
+  "next3Months": 225.00,
+  "nextYear": 900.00,
+  "tokensNextMonth": 300000,
+  "dailyCost": 2.50,
+  "dailyTokens": 10000,
+  "baselineDays": 30
+}
+```
+### **GET /analytics/costs/today**
+Returns today's cost data.
+**Response:**
+```json
+{
+  "date": "2023-05-01",
+  "cost": 2.50,
+  "tokens": 10000,
+  "requests": 100
+}
+```
+---
+## Tier Analytics Endpoints
+### **GET /analytics/tiers/usage**
+Returns usage data categorized by model tiers with aggregated statistics.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "tier_data": {
+    "tier_1": {
+      "name": "Basic",
+      "credits": 1,
+      "total_tokens": 50000,
+      "total_requests": 500,
+      "total_cost": 5.00,
+      "avg_tokens_per_query": 100,
+      "cost_per_1k_tokens": 0.10,
+      "total_credit_cost": 500,
+      "cost_per_credit": 0.01,
+      "models": [...]
+    }
+  },
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01"
+}
+```
+### **GET /analytics/tiers/projections**
+Returns tier-based cost and usage projections.
+**Response:**
+```json
+{
+  "daily_usage": {...},
+  "projections": {
+    "monthly": {...},
+    "quarterly": {...},
+    "yearly": {...}
+  },
+  "tier_definitions": {...}
+}
+```
+### **GET /analytics/tiers/efficiency**
+Returns efficiency metrics by tier including cost per credit and tokens per credit.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "efficiency_data": {...},
+  "most_efficient_tier": "tier_2",
+  "best_value_tier": "tier_1",
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01"
+}
+```
+---
+## Code Execution Analytics Endpoints
+### **GET /analytics/code-executions/summary**
+Returns code execution statistics including success rates and model performance.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "overall_stats": {
+    "total_executions": 1000,
+    "successful_executions": 950,
+    "failed_executions": 50,
+    "success_rate": 0.95,
+    "total_users": 100,
+    "total_chats": 200
+  },
+  "model_performance": [...],
+  "failed_agents": [...]
+}
+```
+### **GET /analytics/code-executions/detailed**
+Returns detailed code execution records with filtering options.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+- `success_filter` (optional): Filter by success status (boolean)
+- `user_id` (optional): Filter by user ID
+- `model_name` (optional): Filter by model name
+- `limit` (optional): Maximum results (default: `100`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "count": 50,
+  "executions": [...]
+}
+```
+### **GET /analytics/code-executions/users**
+Returns code execution statistics grouped by user.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+- `limit` (optional): Maximum users (default: `50`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "users": [...]
+}
+```
+### **GET /analytics/code-executions/error-analysis**
+Returns error analysis with categorized error types and agent failure patterns.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "total_failed_executions": 50,
+  "error_types": [...],
+  "error_by_agent": [...]
+}
+```
+---
+## Feedback Analytics Endpoints
+### **GET /analytics/feedback/summary**
+Returns feedback summary statistics including rating distributions and trends.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "total_feedback": 500,
+  "avg_rating": 4.2,
+  "chats_with_feedback": 200,
+  "ratings_distribution": [
+    {"rating": 1, "count": 10},
+    {"rating": 2, "count": 20},
+    {"rating": 3, "count": 50},
+    {"rating": 4, "count": 200},
+    {"rating": 5, "count": 220}
+  ],
+  "models_data": [...],
+  "feedback_trend": [...]
+}
+```
+### **GET /analytics/feedback/detailed**
+Returns detailed feedback records with filtering and pagination.
+**Query Parameters:**
+- `period` (optional): Time period (`7d`, `30d`, `90d`, default: `30d`)
+- `min_rating` (optional): Minimum rating filter
+- `max_rating` (optional): Maximum rating filter
+- `model_name` (optional): Filter by model name
+- `limit` (optional): Maximum results (default: `100`)
+- `offset` (optional): Pagination offset (default: `0`)
+**Response:**
+```json
+{
+  "period": "30d",
+  "start_date": "2023-04-01",
+  "end_date": "2023-05-01",
+  "total": 500,
+  "count": 100,
+  "offset": 0,
+  "limit": 100,
+  "feedback": [...]
+}
+```
+---
+## Public Endpoints
+### **GET /analytics/public/ticker**
+Returns public ticker data for landing page statistics. **No authentication required.**
+**Response:**
+```json
+{
+  "total_signups": 1000,
+  "total_tokens": 5000000,
+  "total_requests": 50000,
+  "last_updated": "2023-05-01T12:00:00Z"
+}
+```
+---
+## Utility Endpoints
+### **GET /analytics/usage/summary**
+Returns overall usage summary (legacy endpoint, calls dashboard with 30d period).
+### **GET /analytics/debug/model_usage**
+Debug endpoint for testing admin API key validation.
+**Response:**
+```json
+{
+  "status": "success",
+  "message": "Admin API key validated successfully"
+}
+```
+---
+## Error Categorization
+The system automatically categorizes code execution errors into the following types:
+- **NameError**: Variable or function name not found
+- **SyntaxError**: Invalid Python syntax
+- **TypeError**: Type-related errors
+- **AttributeError**: Attribute access errors
+- **IndexError/KeyError**: Index or key access errors
+- **ImportError**: Module import errors
+- **ValueError**: Invalid values passed to functions
+- **OperationError**: Unsupported operations
+- **IndentationError**: Python indentation errors
+- **PermissionError**: File/system permission errors
+- **FileNotFoundError**: File access errors
+- **MemoryError**: Memory allocation errors
+- **TimeoutError**: Operation timeout errors
+- **OtherError**: Uncategorized errors
+## Real-time Updates
+The analytics system supports real-time updates through WebSocket connections:
+- **Dashboard updates**: Broadcasted when new model usage is recorded
+- **User activity updates**: Broadcasted for user activity changes
+- **Model performance updates**: Broadcasted for model-specific metrics
+All real-time updates are sent as JSON messages with `type` field indicating the update category and `metrics` containing the delta or new values.

docs/api/routes/chats.md ADDED Viewed

	@@ -0,0 +1,181 @@

+### Chat Routes Overview
+These routes handle chat interactions, message processing, user management, and debugging.
+---
+### **Chat Management**
+#### **1. Create a New Chat**
+**Endpoint:** `POST /chats/`
+**Description:** Creates a new chat session.
+**Request Body:**
+```json
+{
+  "user_id": 123
+}
+```
+**Response:**
+```json
+{
+  "chat_id": 456,
+  "user_id": 123,
+  "title": "New Chat",
+  "created_at": "2023-05-01T12:00:00Z"
+}
+```
+---
+#### **2. Retrieve a Chat by ID**
+**Endpoint:** `GET /chats/{chat_id}`
+**Description:** Fetches a specific chat along with its messages.
+**Path Parameter:** `chat_id` (ID of the chat)
+**Query Parameter:** `user_id` (Optional for access control)
+**Response:**
+```json
+{
+  "chat_id": 456,
+  "title": "New Chat",
+  "created_at": "2023-05-01T12:00:00Z",
+  "user_id": 123,
+  "messages": [
+    {
+      "message_id": 789,
+      "chat_id": 456,
+      "content": "Hello, how can I help?",
+      "sender": "ai",
+      "timestamp": "2023-05-01T12:01:00Z"
+    }
+  ]
+}
+```
+---
+#### **3. List Recent Chats**
+**Endpoint:** `GET /chats/`
+**Description:** Retrieves a list of recent chats, optionally filtered by user ID.
+**Query Parameters:**
+- `user_id` (Optional for filtering by user)
+- `limit` (Maximum number of chats, default: 10, max: 100)
+- `offset` (For pagination, default: 0)
+**Response:**
+```json
+[
+  {
+    "chat_id": 456,
+    "user_id": 123,
+    "title": "New Chat",
+    "created_at": "2023-05-01T12:00:00Z"
+  }
+]
+```
+---
+#### **4. Update a Chat**
+**Endpoint:** `PUT /chats/{chat_id}`
+**Description:** Updates a chat's title or user ID.
+**Path Parameter:** `chat_id` (ID of the chat to update)
+**Request Body:**
+```json
+{
+  "title": "Updated Chat Title",
+  "user_id": 123
+}
+```
+**Response:**
+```json
+{
+  "chat_id": 456,
+  "title": "Updated Chat Title",
+  "created_at": "2023-05-01T12:00:00Z",
+  "user_id": 123
+}
+```
+---
+#### **5. Delete a Chat**
+**Endpoint:** `DELETE /chats/{chat_id}`
+**Description:** Deletes a chat and all its messages while preserving model usage records.
+**Path Parameter:** `chat_id` (ID of the chat to delete)
+**Query Parameter:** `user_id` (Optional for access control)
+**Response:**
+```json
+{
+  "message": "Chat 456 deleted successfully",
+  "preserved_model_usage": true
+}
+```
+---
+#### **6. Cleanup Empty Chats**
+**Endpoint:** `POST /chats/cleanup-empty`
+**Description:** Deletes empty chats for a user.
+**Request Body:**
+```json
+{
+  "user_id": 123,
+  "is_admin": false
+}
+```
+**Response:**
+```json
+{
+  "message": "Deleted 5 empty chats"
+}
+```
+---
+### **Message Management**
+#### **1. Add Message to Chat**
+**Endpoint:** `POST /chats/{chat_id}/messages`
+**Description:** Adds a message to an existing chat.
+**Path Parameter:** `chat_id` (ID of the chat)
+**Query Parameter:** `user_id` (Optional for access control)
+**Request Body:**
+```json
+{
+  "content": "Hello, I need help with data analysis",
+  "sender": "user"
+}
+```
+**Response:**
+```json
+{
+  "message_id": 789,
+  "chat_id": 456,
+  "content": "Hello, I need help with data analysis",
+  "sender": "user",
+  "timestamp": "2023-05-01T12:01:00Z"
+}
+```
+---
+### **User Management**
+#### **1. Create or Retrieve a User**
+**Endpoint:** `POST /chats/users`
+**Description:** Creates a new user or retrieves an existing one based on email.
+**Request Body:**
+```json
+{
+  "username": "john_doe",
+  "email": "john@example.com"
+}
+```
+**Response:**
+```json
+{
+  "user_id": 123,
+  "username": "john_doe",
+  "email": "john@example.com",
+  "created_at": "2023-05-01T12:00:00Z"
+}
+```

docs/api/routes/code.md ADDED Viewed

	@@ -0,0 +1,182 @@

+# Code Routes Documentation
+This document describes the API endpoints available for code execution, editing, fixing, and cleaning operations in the Auto-Analyst backend.
+## Base URL
+All code-related endpoints are prefixed with `/code`.
+## Endpoints
+### Execute Code
+Executes Python code against the current session's dataframe.
+**Endpoint:** `POST /code/execute`
+**Request Body:**
+```json
+{
+    "code": "string",              // Python code to execute
+    "session_id": "string",        // Optional session ID
+    "message_id": 123              // Optional message ID for tracking
+}
+```
+**Response:**
+```json
+{
+    "output": "string",            // Execution output
+    "plotly_outputs": [            // Optional array of plotly outputs
+        "string"
+    ]
+}
+```
+**Error Responses:**
+- `400 Bad Request`: No dataset loaded or no code provided
+- `500 Internal Server Error`: Execution error
+### Edit Code
+Uses AI to edit code based on user instructions.
+**Endpoint:** `POST /code/edit`
+**Request Body:**
+```json
+{
+    "original_code": "string",     // Code to be edited
+    "user_prompt": "string"        // Instructions for editing
+}
+```
+**Response:**
+```json
+{
+    "edited_code": "string"        // The edited code
+}
+```
+**Error Responses:**
+- `400 Bad Request`: Missing original code or editing instructions
+- `500 Internal Server Error`: Editing error
+### Fix Code
+Uses AI to fix code with errors, employing a block-by-block approach with DSPy refinement.
+**Endpoint:** `POST /code/fix`
+**Request Body:**
+```json
+{
+    "code": "string",              // Code containing errors
+    "error": "string"              // Error message to fix
+}
+```
+**Response:**
+```json
+{
+    "fixed_code": "string"         // The fixed code
+}
+```
+**Error Responses:**
+- `400 Bad Request`: Missing code or error message
+- `500 Internal Server Error`: Fixing error
+### Clean Code
+Cleans and formats code by organizing imports and ensuring proper code block formatting.
+**Endpoint:** `POST /code/clean-code`
+**Request Body:**
+```json
+{
+    "code": "string"               // Code to clean
+}
+```
+**Response:**
+```json
+{
+    "cleaned_code": "string"       // The cleaned code
+}
+```
+**Error Responses:**
+- `400 Bad Request`: No code provided
+- `500 Internal Server Error`: Cleaning error
+### Get Latest Code
+Retrieves the latest code from a specific message.
+**Endpoint:** `POST /code/get-latest-code`
+**Request Body:**
+```json
+{
+    "message_id": 123              // Message ID to retrieve code from
+}
+```
+**Response:**
+```json
+{
+    "code": "string"               // The retrieved code
+}
+```
+**Error Responses:**
+- `400 Bad Request`: Missing message ID
+- `404 Not Found`: Message not found
+- `500 Internal Server Error`: Retrieval error
+## Code Processing Features
+### Import Organization
+The code processing system automatically:
+- Moves all import statements to the top of the file
+- Deduplicates imports
+- Sorts imports alphabetically
+### Code Block Management
+The system supports code blocks marked with special comments:
+- Start marker: `# agent_name code start`
+- End marker: `# agent_name code end`
+### Error Handling with DSPy Refinement
+When fixing code, the system uses DSPy's refinement mechanism:
+- Identifies specific code blocks with errors
+- Processes error messages to extract relevant information
+- Uses a scoring function to validate fixes
+- Employs iterative refinement with up to 3 attempts
+- Fixes each block individually while maintaining the overall structure
+- Preserves code block markers and relationships
+### Dataset Context
+When editing or fixing code, the system provides context about the current dataset including:
+- Number of rows and columns
+- Column names and data types
+- Null value counts
+- Sample values for each column
+### Code Execution Safety
+The execution system includes safety measures:
+- Removes blocking calls like `plt.show()`
+- Handles `__main__` block extraction
+- Cleans up print statements with unwanted newlines
+- Executes code in isolated namespaces
+## Session Management
+All endpoints require a valid session ID, which is used to:
+- Access the current dataset
+- Maintain state between requests
+- Track code execution history
+- Store execution results for analysis
+## Error Handling
+The system provides detailed error messages while maintaining security by:
+- Logging errors for debugging
+- Returning user-friendly error messages
+- Preserving original code in case of processing failures
+- Using code scoring to validate fixes before returning results

docs/api/routes/deep_analysis.md ADDED Viewed

	@@ -0,0 +1,348 @@

+# Deep Analysis API Documentation
+## Overview
+The Deep Analysis system provides advanced multi-agent analytical capabilities that automatically generate comprehensive reports based on user goals. The system uses DSPy (Declarative Self-improving Language Programs) to orchestrate multiple AI agents and create detailed analytical insights.
+## Key Features
+- **Multi-Agent Analysis**: Orchestrates multiple specialized agents (preprocessing, statistical analysis, machine learning, visualization)
+- **Template Integration**: Uses the user's active templates/agents for analysis
+- **Streaming Progress**: Real-time progress updates during analysis execution
+- **Report Persistence**: Stores complete analysis reports in database with metadata
+- **HTML Export**: Generates downloadable HTML reports with visualizations
+- **Credit Tracking**: Monitors token usage, costs, and credits consumed
+## Template Integration
+The deep analysis system integrates with the user's active templates through the agent system:
+1. **Agent Selection**: Uses agents from the user's active template preferences (configured via `/templates` endpoints)
+2. **Default Agents**: Falls back to system default agents if user hasn't configured preferences:
+   - `preprocessing` (both individual and planner variants)
+   - `statistical_analytics` (both individual and planner variants)
+   - `sk_learn` (both individual and planner variants)
+   - `data_viz` (both individual and planner variants)
+3. **Template Limits**: Respects the 10-template limit for planner performance optimization
+4. **Dynamic Planning**: The planner automatically selects the most appropriate agents based on the analysis goal and available templates
+## Analysis Flow
+The deep analysis process follows these steps:
+1. **Question Generation** (20% progress): Generates 5 targeted analytical questions based on the user's goal
+2. **Planning** (40% progress): Creates an optimized execution plan using available agents
+3. **Agent Execution** (60% progress): Executes analysis using user's active templates
+4. **Code Synthesis** (80% progress): Combines and optimizes code from all agents
+5. **Code Execution** (85% progress): Runs the synthesized analysis code
+6. **Synthesis** (90% progress): Synthesizes results into coherent insights
+7. **Conclusion** (100% progress): Generates final conclusions and recommendations
+---
+## API Endpoints
+### Create Deep Analysis Report
+**POST** `/deep_analysis/reports`
+Creates a new deep analysis report in the database.
+**Request Body:**
+```json
+{
+  "report_uuid": "string",
+  "user_id": 123,
+  "goal": "Analyze customer churn patterns",
+  "status": "completed",
+  "deep_questions": "1. What factors...\n2. How does...",
+  "deep_plan": "{\n  \"@preprocessing\": {\n    \"create\": [...],\n    \"use\": [...],\n    \"instruction\": \"...\"\n  }\n}",
+  "summaries": ["Agent summary 1", "Agent summary 2"],
+  "analysis_code": "import pandas as pd\n# Analysis code...",
+  "plotly_figures": [{"data": [...], "layout": {...}}],
+  "synthesis": ["Synthesis result 1"],
+  "final_conclusion": "## Conclusion\nThe analysis reveals...",
+  "html_report": "<html>...</html>",
+  "report_summary": "Brief summary of findings",
+  "progress_percentage": 100,
+  "duration_seconds": 120,
+  "credits_consumed": 5,
+  "error_message": null,
+  "model_provider": "anthropic",
+  "model_name": "claude-sonnet-4-20250514",
+  "total_tokens_used": 15000,
+  "estimated_cost": 0.25,
+  "steps_completed": ["questions", "planning", "execution", "synthesis", "conclusion"]
+}
+```
+**Response:**
+```json
+{
+  "report_id": 1,
+  "report_uuid": "uuid-string",
+  "user_id": 123,
+  "goal": "Analyze customer churn patterns",
+  "status": "completed",
+  "start_time": "2024-01-01T12:00:00Z",
+  "end_time": "2024-01-01T12:02:00Z",
+  "duration_seconds": 120,
+  "report_summary": "Brief summary of findings",
+  "created_at": "2024-01-01T12:02:00Z",
+  "updated_at": "2024-01-01T12:02:00Z"
+}
+```
+### Get Deep Analysis Reports
+**GET** `/deep_analysis/reports`
+Retrieves a list of deep analysis reports with optional filtering.
+**Query Parameters:**
+- `user_id` (optional): Filter by user ID
+- `limit` (optional): Number of reports to return (1-100, default: 10)
+- `offset` (optional): Number of reports to skip (default: 0)
+- `status` (optional): Filter by status ("pending", "running", "completed", "failed")
+**Response:**
+```json
+[
+  {
+    "report_id": 1,
+    "report_uuid": "uuid-string",
+    "user_id": 123,
+    "goal": "Analyze customer churn patterns",
+    "status": "completed",
+    "start_time": "2024-01-01T12:00:00Z",
+    "end_time": "2024-01-01T12:02:00Z",
+    "duration_seconds": 120,
+    "report_summary": "Brief summary of findings",
+    "created_at": "2024-01-01T12:02:00Z",
+    "updated_at": "2024-01-01T12:02:00Z"
+  }
+]
+```
+### Get User Historical Reports
+**GET** `/deep_analysis/reports/user_historical`
+Retrieves all historical deep analysis reports for a specific user.
+**Query Parameters:**
+- `user_id`: User ID (required)
+- `limit` (optional): Number of reports to return (1-100, default: 50)
+### Get Report by ID
+**GET** `/deep_analysis/reports/{report_id}`
+Retrieves a complete deep analysis report by ID.
+**Query Parameters:**
+- `user_id` (optional): Ensures report belongs to specified user
+**Response:**
+```json
+{
+  "report_id": 1,
+  "report_uuid": "uuid-string",
+  "user_id": 123,
+  "goal": "Analyze customer churn patterns",
+  "status": "completed",
+  "start_time": "2024-01-01T12:00:00Z",
+  "end_time": "2024-01-01T12:02:00Z",
+  "duration_seconds": 120,
+  "deep_questions": "1. What factors contribute to churn?\n2. How does churn vary by segment?",
+  "deep_plan": "{\n  \"@preprocessing\": {...},\n  \"@statistical_analytics\": {...}\n}",
+  "summaries": ["Agent performed data cleaning...", "Statistical analysis revealed..."],
+  "analysis_code": "import pandas as pd\n# Complete analysis code",
+  "plotly_figures": [{"data": [...], "layout": {...}}],
+  "synthesis": ["The analysis shows clear patterns..."],
+  "final_conclusion": "## Conclusion\nCustomer churn is primarily driven by...",
+  "html_report": "<html>...</html>",
+  "report_summary": "Analysis of customer churn patterns reveals...",
+  "progress_percentage": 100,
+  "credits_consumed": 5,
+  "error_message": null,
+  "model_provider": "anthropic",
+  "model_name": "claude-sonnet-4-20250514",
+  "total_tokens_used": 15000,
+  "estimated_cost": 0.25,
+  "steps_completed": ["questions", "planning", "execution", "synthesis", "conclusion"],
+  "created_at": "2024-01-01T12:02:00Z",
+  "updated_at": "2024-01-01T12:02:00Z"
+}
+```
+### Get Report by UUID
+**GET** `/deep_analysis/reports/uuid/{report_uuid}`
+Retrieves a complete deep analysis report by UUID. Same response format as get by ID.
+### Delete Report
+**DELETE** `/deep_analysis/reports/{report_id}`
+Deletes a deep analysis report.
+**Query Parameters:**
+- `user_id` (optional): Ensures report belongs to specified user
+**Response:**
+```json
+{
+  "message": "Report 1 deleted successfully"
+}
+```
+### Update Report Status
+**PUT** `/deep_analysis/reports/{report_id}/status`
+Updates the status of a deep analysis report.
+**Request Body:**
+```json
+{
+  "status": "completed"
+}
+```
+**Valid Status Values:**
+- `pending`: Analysis queued but not started
+- `running`: Analysis in progress
+- `completed`: Analysis finished successfully
+- `failed`: Analysis encountered errors
+### Get HTML Report
+**GET** `/deep_analysis/reports/uuid/{report_uuid}/html`
+Retrieves only the HTML report content for a specific analysis.
+**Query Parameters:**
+- `user_id` (optional): Ensures report belongs to specified user
+**Response:**
+```json
+{
+  "html_report": "<html>...</html>",
+  "filename": "deep_analysis_report_20240101_120200.html"
+}
+```
+### Download HTML Report
+**POST** `/deep_analysis/download_from_db/{report_uuid}`
+Downloads the HTML report as a file attachment.
+**Query Parameters:**
+- `user_id` (optional): Ensures report belongs to specified user
+**Response:**
+- Content-Type: `text/html; charset=utf-8`
+- Content-Disposition: `attachment; filename="deep_analysis_report_TIMESTAMP.html"`
+---
+## Deep Analysis Module Architecture
+### DSPy Signatures
+The system uses several DSPy signatures for different analysis phases:
+#### 1. `deep_questions`
+Generates 5 targeted analytical questions based on the user's goal and dataset structure.
+#### 2. `deep_planner`
+Creates an optimized execution plan using the user's active templates/agents. The planner:
+- Verifies feasibility using available datasets and agent descriptions
+- Batches similar questions per agent call for efficiency
+- Reuses outputs across questions to minimize agent calls
+- Defines clear variable flow and dependencies between agents
+#### 3. `deep_code_synthesizer`
+Combines and optimizes code from multiple agents:
+- Fixes errors and inconsistencies between agent outputs
+- Ensures proper data flow and type handling
+- Converts all visualizations to Plotly format
+- Adds comprehensive error handling and validation
+#### 4. `deep_synthesizer`
+Synthesizes analysis results into coherent insights and findings.
+#### 5. `final_conclusion`
+Generates final conclusions and strategic recommendations based on all analysis results.
+### Streaming Analysis
+The `execute_deep_analysis_streaming` method provides real-time progress updates:
+```python
+async for update in deep_analysis.execute_deep_analysis_streaming(goal, dataset_info, session_df):
+    if update["step"] == "questions":
+        # Handle questions generation progress
+    elif update["step"] == "planning":
+        # Handle planning progress
+    elif update["step"] == "agent_execution":
+        # Handle agent execution progress
+    # ... handle other steps
+```
+### Integration with User Templates
+The deep analysis system integrates with user templates in several ways:
+1. **Agent Discovery**: Retrieves user's active template preferences from the database
+2. **Dynamic Planning**: The planner uses available agents to create optimal execution plans
+3. **Template Validation**: Ensures all referenced agents exist in the user's active templates
+4. **Fallback Handling**: Uses default agents if user preferences are incomplete
+5. **Performance Optimization**: Respects template limits for efficient execution
+### Error Handling
+The system includes comprehensive error handling:
+- **Code Execution Errors**: Automatically attempts to fix and retry failed code
+- **Template Missing**: Falls back to default agents if user templates are unavailable
+- **Timeout Protection**: Includes timeouts for long-running operations
+- **Memory Management**: Handles large datasets and visualization efficiently
+- **Unicode Handling**: Cleans problematic characters that might cause encoding issues
+### Visualization Integration
+All visualizations are standardized to Plotly format:
+- Consistent styling and color schemes
+- Interactive features (zoom, pan, hover)
+- Accessibility compliance (colorblind-friendly palettes)
+- Export capabilities for reports
+- Responsive design for different screen sizes
+---
+## Frontend Integration
+The deep analysis system includes React components for:
+- **DeepAnalysisSidebar**: Main interface for starting and managing analyses
+- **NewAnalysisForm**: Form for initiating new deep analyses
+- **CurrentAnalysisView**: Real-time progress tracking during analysis
+- **HistoryView**: Browse and access historical analysis reports
+- **AnalysisStep**: Individual step progress visualization
+The frontend integrates with the streaming API to provide real-time feedback and uses the user's active template configuration for personalized analysis capabilities.
+## Credit and Cost Tracking
+The system tracks detailed usage metrics:
+- **Credits Consumed**: Number of credits deducted from user account
+- **Token Usage**: Total tokens used across all model calls
+- **Estimated Cost**: Dollar cost estimate based on model pricing
+- **Model Information**: Provider and model name used for analysis
+- **Execution Time**: Duration of analysis for performance monitoring
+This information helps users understand resource consumption and optimize their analysis strategies.

docs/api/routes/feedback.md ADDED Viewed

	@@ -0,0 +1,153 @@

+# Feedback Routes Documentation
+This document describes the API endpoints available for managing user feedback on AI-generated messages in the Auto-Analyst backend.
+## Base URL
+All feedback-related endpoints are prefixed with `/feedback`.
+## Endpoints
+### Create or Update Message Feedback
+Creates new feedback or updates existing feedback for a specific message.
+**Endpoint:** `POST /feedback/message/{message_id}`
+**Path Parameters:**
+- `message_id`: ID of the message to provide feedback for
+**Request Body:**
+```json
+{
+    "rating": 5,                           // Required: Star rating (1-5)
+    "model_name": "gpt-4o-mini",          // Optional: Model used for the message
+    "model_provider": "openai",           // Optional: Provider of the model
+    "temperature": 0.7,                   // Optional: Temperature setting
+    "max_tokens": 6000                    // Optional: Max tokens setting
+}
+```
+**Response:**
+```json
+{
+    "feedback_id": 123,
+    "message_id": 456,
+    "rating": 5,
+    "feedback_comment": null,
+    "model_name": "gpt-4o-mini",
+    "model_provider": "openai",
+    "temperature": 0.7,
+    "max_tokens": 6000,
+    "created_at": "2023-05-01T12:00:00Z",
+    "updated_at": "2023-05-01T12:00:00Z"
+}
+```
+**Error Responses:**
+- `404 Not Found`: Message with specified ID not found
+- `500 Internal Server Error`: Failed to create/update feedback
+### Get Message Feedback
+Retrieves feedback for a specific message.
+**Endpoint:** `GET /feedback/message/{message_id}`
+**Path Parameters:**
+- `message_id`: ID of the message to get feedback for
+**Response:**
+```json
+{
+    "feedback_id": 123,
+    "message_id": 456,
+    "rating": 5,
+    "feedback_comment": null,
+    "model_name": "gpt-4o-mini",
+    "model_provider": "openai",
+    "temperature": 0.7,
+    "max_tokens": 6000,
+    "created_at": "2023-05-01T12:00:00Z",
+    "updated_at": "2023-05-01T12:00:00Z"
+}
+```
+**Error Responses:**
+- `404 Not Found`: No feedback found for the specified message
+- `500 Internal Server Error`: Failed to retrieve feedback
+### Get Chat Feedback
+Retrieves all feedback for messages in a specific chat.
+**Endpoint:** `GET /feedback/chat/{chat_id}`
+**Path Parameters:**
+- `chat_id`: ID of the chat to get feedback for
+**Response:**
+```json
+[
+    {
+        "feedback_id": 123,
+        "message_id": 456,
+        "rating": 5,
+        "feedback_comment": null,
+        "model_name": "gpt-4o-mini",
+        "model_provider": "openai",
+        "temperature": 0.7,
+        "max_tokens": 6000,
+        "created_at": "2023-05-01T12:00:00Z",
+        "updated_at": "2023-05-01T12:00:00Z"
+    }
+]
+```
+**Note:** Returns an empty array if no feedback exists for the chat.
+**Error Responses:**
+- `500 Internal Server Error`: Failed to retrieve chat feedback
+## Feedback Features
+### Rating System
+- **Scale:** 1-5 star rating system
+- **Required:** Rating is the only required field for feedback
+- **Purpose:** Allows users to rate the quality of AI responses
+### Model Context Tracking
+The system optionally tracks:
+- **Model Name:** The specific AI model used (e.g., "gpt-4o-mini")
+- **Model Provider:** The provider of the model (e.g., "openai", "anthropic")
+- **Temperature:** The creativity/randomness setting used
+- **Max Tokens:** The maximum response length setting
+### Update Behavior
+- **Upsert Operation:** The POST endpoint either creates new feedback or updates existing feedback
+- **Partial Updates:** When updating, only provided fields are modified
+- **Timestamp Tracking:** Both creation and update timestamps are maintained
+## Data Management
+### Database Operations
+- **Atomic Operations:** Feedback creation/updates are handled in database transactions
+- **Referential Integrity:** Feedback is linked to specific messages via foreign keys
+- **Soft Handling:** Missing optional fields are handled gracefully
+### Error Handling
+- **Comprehensive Logging:** All operations are logged for debugging
+- **User-Friendly Messages:** Error responses provide clear information
+- **Transaction Safety:** Failed operations are rolled back to maintain data consistency
+## Usage Patterns
+### Typical Workflow
+1. User receives an AI-generated message
+2. User provides rating (1-5 stars) via the frontend
+3. Frontend calls `POST /feedback/message/{message_id}` with rating and model context
+4. System stores or updates the feedback
+5. Feedback can be retrieved later for analytics or user review
+### Analytics Integration
+Feedback data is used by the analytics system to:
+- Track model performance across different configurations
+- Identify patterns in user satisfaction
+- Generate insights for model optimization

docs/api/routes/session.md ADDED Viewed

	@@ -0,0 +1,273 @@

+# **Auto-Analyst API Documentation**
+The core application routes are designed to manage the data and AI analysis capabilities of the Auto-Analyst application.
+## **1. Core Application Routes**
+### **Data Management**
+#### **POST /upload_dataframe**
+Uploads a CSV dataset for analysis.
+**Request:**
+- `file`: CSV file
+- `name`: Dataset name
+- `description`: Dataset description
+**Headers:**
+- `X-Force-Refresh`: "true" (optional) - Forces session reset before upload
+**Response:**
+```json
+{ "message": "Dataframe uploaded successfully", "session_id": "abc123" }
+```
+#### **POST /upload_excel**
+Uploads an Excel file with a specific sheet for analysis.
+**Request:**
+- `file`: Excel file
+- `name`: Dataset name
+- `description`: Dataset description
+- `sheet_name`: Name of the Excel sheet to use
+**Headers:**
+- `X-Force-Refresh`: "true" (optional) - Forces session reset before upload
+**Response:**
+```json
+{ "message": "Excel file processed successfully", "session_id": "abc123", "sheet": "Sheet1" }
+```
+#### **POST /api/excel-sheets**
+Gets the list of sheet names from an Excel file.
+**Request:**
+- `file`: Excel file
+**Response:**
+```json
+{ "sheets": ["Sheet1", "Sheet2", "Data"] }
+```
+#### **GET /api/default-dataset**
+Gets the default dataset.
+**Response:**
+```json
+{
+  "headers": ["column1", "column2", ...],
+  "rows": [[val1, val2, ...], ...],
+  "name": "Housing Dataset",
+  "description": "A comprehensive dataset containing housing information..."
+}
+```
+#### **POST /reset-session**
+Resets session to default dataset.
+**Request Body:**
+```json
+{
+  "name": "optional name",
+  "description": "optional description",
+  "preserveModelSettings": false
+}
+```
+**Response:**
+```json
+{
+  "message": "Session reset to default dataset",
+  "session_id": "abc123",
+  "dataset": "Housing.csv"
+}
+```
+#### **GET /api/preview-csv** / **POST /api/preview-csv**
+Preview the current dataset in the session.
+**Response:**
+```json
+{
+  "headers": ["column1", "column2", ...],
+  "rows": [[val1, val2, ...], ...],
+  "name": "Dataset Name",
+  "description": "Dataset description..."
+}
+```
+---
+### **2. AI Analysis**
+#### **POST /chat/{agent_name}**
+Processes a query using a specific AI agent.
+**Path Parameters:** `agent_name`
+**Request Body:**
+```json
+{ "query": "Analyze the relationship between price and size" }
+```
+**Query Parameters:** `user_id` (optional), `chat_id` (optional)
+**Response:**
+```json
+{
+  "agent_name": "data_viz_agent",
+  "query": "Analyze the relationship between price and size",
+  "response": "# Analysis\n\nThere appears to be a strong positive correlation...",
+  "session_id": "abc123"
+}
+```
+#### **POST /chat**
+Processes a query using multiple AI agents with streaming responses.
+**Request Body:**
+```json
+{ "query": "Analyze the housing data" }
+```
+**Query Parameters:** `user_id` (optional), `chat_id` (optional)
+**Response:** *Streaming JSON objects:*
+```json
+{"agent": "data_viz_agent", "content": "# Visualization\n\n...", "status": "success"}
+{"agent": "statistical_analytics_agent", "content": "# Statistical Analysis\n\n...", "status": "success"}
+```
+#### **POST /chat_history_name**
+Generates a name for a chat based on the query.
+**Request Body:**
+```json
+{ "query": "Analyze sales data for Q4" }
+```
+**Response:**
+```json
+{ "name": "Chat about sales data analysis" }
+```
+#### **GET /agents**
+Lists available AI agents.
+**Response:**
+```json
+{
+  "available_agents": ["data_viz_agent", "sk_learn_agent", "statistical_analytics_agent", "preprocessing_agent"],
+  "standard_agents": ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"],
+  "template_agents": ["custom_template_1", "custom_template_2"],
+  "custom_agents": []
+}
+```
+---
+### **3. Deep Analysis**
+#### **POST /deep_analysis_streaming**
+Performs comprehensive deep analysis with real-time streaming updates.
+**Request Body:**
+```json
+{ "goal": "Perform comprehensive analysis of the sales data" }
+```
+**Query Parameters:** `user_id` (optional), `chat_id` (optional)
+**Response:** *Streaming JSON objects with progress updates*
+#### **POST /deep_analysis/download_report**
+Downloads an HTML report from deep analysis results.
+**Request Body:**
+```json
+{
+  "analysis_data": { ... },
+  "report_uuid": "optional-uuid"
+}
+```
+**Response:** HTML file download
+---
+### **4. Model Settings**
+#### **GET /api/model-settings**
+Fetches current model settings.
+**Response:**
+```json
+{
+  "provider": "openai",
+  "model": "gpt-4o-mini",
+  "hasCustomKey": true,
+  "temperature": 1.0,
+  "maxTokens": 6000
+}
+```
+#### **POST /settings/model**
+Updates model settings.
+**Request Body:**
+```json
+{
+  "provider": "openai",
+  "model": "gpt-4",
+  "api_key": "sk-...",
+  "temperature": 0.7,
+  "max_tokens": 8000
+}
+```
+**Response:**
+```json
+{ "message": "Model settings updated successfully" }
+```
+---
+### **5. Session Management**
+#### **GET /api/session-info**
+Gets information about the current session.
+**Response:**
+```json
+{
+  "session_id": "abc123",
+  "dataset_name": "Housing Dataset",
+  "dataset_description": "...",
+  "model_config": { ... }
+}
+```
+#### **POST /set-message-info**
+Associates message tracking information with the session.
+**Request Body:**
+```json
+{
+  "chat_id": 123,
+  "message_id": 456,
+  "user_id": 789
+}
+```
+#### **POST /create-dataset-description**
+Creates an AI-generated description for a dataset.
+**Request Body:**
+```json
+{
+  "df_preview": "column1,column2\nvalue1,value2\n...",
+  "name": "Dataset Name"
+}
+```
+---
+### **6. System Endpoints**
+#### **GET /**
+Returns API welcome information and feature list.
+#### **GET /health**
+Health check endpoint.
+**Response:**
+```json
+{ "message": "API is healthy and running" }
+```
+---
+---
+### **7. Authentication & Session Management**
+- **Session ID Sources:**
+  - Query parameter: `session_id`
+  - Header: `X-Session-ID`
+  - Auto-generated if not provided
+- **Session State Includes:**
+  - Current dataset
+  - AI system instance
+  - Model configuration
+  - User and chat associations
+### **9. Error Handling**
+- Comprehensive error handling with appropriate HTTP status codes
+- Detailed error messages for debugging
+- Fallback encoding support for CSV files (UTF-8, unicode_escape, ISO-8859-1)
+- Session state preservation during errors

docs/api/routes/templates.md ADDED Viewed

	@@ -0,0 +1,363 @@

+# Templates and Agent Loading Documentation
+This document describes how the Auto-Analyst template system works, including agent loading, user preferences, and template management.
+## Overview
+The Auto-Analyst system uses a flexible template-based approach for managing AI agents. Templates define specialized agents with specific capabilities, and users can customize which agents are available for their analysis workflows.
+## Template System Architecture
+### Template Types
+Templates come in different **variant types** that determine how they can be used:
+- **`individual`**: Templates available for single-agent queries (e.g., `@preprocessing_agent`)
+- **`planner`**: Templates available for multi-agent planning workflows
+- **`both`**: Templates available in both individual and planner contexts
+### Default Agents
+The system includes four core default agents that are **enabled by default** for all users:
+**For Individual Use:**
+- `preprocessing_agent`: Data cleaning and preprocessing
+- `statistical_analytics_agent`: Statistical analysis and insights
+- `sk_learn_agent`: Machine learning with scikit-learn
+- `data_viz_agent`: Data visualization with Plotly
+**For Planner Use:**
+- `planner_preprocessing_agent`: Planning version of preprocessing agent
+- `planner_statistical_analytics_agent`: Planning version of statistical agent
+- `planner_sk_learn_agent`: Planning version of ML agent
+- `planner_data_viz_agent`: Planning version of visualization agent
+## Template Management Endpoints
+### Get All Templates
+**Endpoint:** `GET /templates/`
+**Query Parameters:**
+- `variant_type`: Filter by `"individual"`, `"planner"`, or `"all"` (default: `"all"`)
+**Response:**
+```json
+[
+  {
+    "template_id": 1,
+    "template_name": "preprocessing_agent",
+    "display_name": "Data Preprocessing Agent",
+    "description": "Handles data cleaning, missing values, and preprocessing tasks",
+    "prompt_template": "You are a data preprocessing specialist...",
+    "template_category": "Data Processing",
+    "icon_url": "/icons/templates/preprocessing_agent.svg",
+    "is_premium_only": false,
+    "is_active": true,
+    "usage_count": 12,
+    "created_at": "2023-05-01T12:00:00Z",
+    "updated_at": "2023-05-01T12:00:00Z"
+  }
+]
+```
+### Get Templates by Category
+**Endpoint:** `GET /templates/categories`
+**Query Parameters:**
+- `variant_type`: Filter by `"individual"`, `"planner"`, or `"all"` (default: `"individual"`)
+**Response:**
+```json
+[
+  {
+    "category": "Data Processing",
+    "templates": [
+      {
+        "agent_id": 1,
+        "agent_name": "preprocessing_agent",
+        "display_name": "Data Preprocessing Agent",
+        "description": "Handles data cleaning and preprocessing",
+        "icon_url": "/icons/templates/preprocessing_agent.svg",
+        "usage_count": 1234
+      }
+    ]
+  }
+]
+```
+### Get Template by ID
+**Endpoint:** `GET /templates/template/{template_id}`
+**Response:**
+```json
+{
+  "template_id": 1,
+  "template_name": "preprocessing_agent",
+  "display_name": "Data Preprocessing Agent",
+  "description": "Handles data cleaning, missing values, and preprocessing tasks",
+  "prompt_template": "You are a data preprocessing specialist...",
+  "template_category": "Data Processing",
+  "icon_url": "/icons/templates/preprocessing_agent.svg",
+  "is_premium_only": false,
+  "is_active": true,
+  "usage_count": 1234,
+  "created_at": "2023-05-01T12:00:00Z",
+  "updated_at": "2023-05-01T12:00:00Z"
+}
+```
+### Get Template Categories List
+**Endpoint:** `GET /templates/categories/list`
+**Response:**
+```json
+{
+  "categories": [
+    "Data Processing",
+    "Machine Learning",
+    "Visualization",
+    "Statistics"
+  ]
+}
+```
+### Get Templates by Specific Category
+**Endpoint:** `GET /templates/category/{category}`
+**Path Parameters:**
+- `category`: Name of the category to filter by
+**Response:**
+```json
+[
+  {
+    "template_id": 1,
+    "template_name": "preprocessing_agent",
+    "display_name": "Data Preprocessing Agent",
+    "description": "Handles data cleaning and preprocessing",
+    "template_category": "Data Processing",
+    "icon_url": "/icons/templates/preprocessing_agent.svg",
+    "usage_count": 1234
+  }
+]
+```
+## User Template Preferences
+### How Agent Loading Works for Users
+1. **Default Behavior**: New users automatically have the 4 core default agents enabled
+2. **Custom Preferences**: Users can enable/disable additional templates through preferences
+3. **Variant-Specific**: Individual and planner variants are managed separately
+4. **Usage Tracking**: System tracks which templates users actually use
+### Get User Template Preferences
+**Endpoint:** `GET /templates/user/{user_id}`
+**Query Parameters:**
+- `variant_type`: Filter by `"individual"`, `"planner"`, or `"all"` (default: `"planner"`)
+**Response:**
+```json
+[
+  {
+    "template_id": 1,
+    "template_name": "preprocessing_agent",
+    "display_name": "Data Preprocessing Agent",
+    "description": "Handles data cleaning and preprocessing",
+    "template_category": "Data Processing",
+    "icon_url": "/icons/templates/preprocessing_agent.svg",
+    "is_premium_only": false,
+    "is_active": true,
+    "is_enabled": true,
+    "usage_count": 15,
+    "last_used_at": "2023-05-01T12:00:00Z",
+    "created_at": "2023-04-01T12:00:00Z",
+    "updated_at": "2023-05-01T12:00:00Z"
+  }
+]
+```
+### Get Only Enabled Templates
+**Endpoint:** `GET /templates/user/{user_id}/enabled`
+Returns only templates that are currently enabled for the user.
+### Get Enabled Templates for Planner
+**Endpoint:** `GET /templates/user/{user_id}/enabled/planner`
+Returns enabled planner templates with the following restrictions:
+- **Maximum 10 templates** for planner use
+- **Sorted by usage** (most used first)
+- **Only planner variants** (`planner` or `both` types)
+## Template Preference Management
+### Toggle Single Template
+**Endpoint:** `POST /templates/user/{user_id}/template/{template_id}/toggle`
+**Request Body:**
+```json
+{
+  "is_enabled": true
+}
+```
+**Restrictions:**
+- Cannot disable all templates (at least 1 must remain enabled)
+- Cannot enable more than 10 templates for planner use
+### Bulk Toggle Templates
+**Endpoint:** `POST /templates/user/{user_id}/bulk-toggle`
+**Request Body:**
+```json
+{
+  "preferences": [
+    {
+      "template_id": 1,
+      "is_enabled": true
+    },
+    {
+      "template_id": 2,
+      "is_enabled": false
+    }
+  ]
+}
+```
+**Response:**
+```json
+{
+  "results": [
+    {
+      "template_id": 1,
+      "success": true,
+      "message": "Template enabled successfully",
+      "is_enabled": true
+    }
+  ]
+}
+```
+## Template Categories and Icons
+### Available Categories
+Templates are organized into categories such as:
+- **Data Processing**: Preprocessing, cleaning, feature engineering
+- **Machine Learning**: Various ML frameworks and algorithms
+- **Visualization**: Plotting and chart generation
+- **Statistics**: Statistical analysis and modeling
+- **Custom**: User or organization-specific templates
+### Icon System
+Templates include visual icons stored in `/public/icons/templates/`:
+**Core Agent Icons:**
+- `preprocessing_agent.svg`: Data preprocessing
+- `sk_learn_agent.svg`: Machine learning
+- `matplotlib_agent.png`: Plotting with matplotlib
+- `polars_agent.svg`: Data manipulation with Polars
+**Library-Specific Icons:**
+- `numpy.svg`, `scipy.png`: Scientific computing
+- `plotly.svg`, `seaborn.svg`: Advanced visualization
+- `lightgbm.png`, `xgboost.png`: Gradient boosting
+- `pymc.png`, `statsmodel.svg`: Statistical modeling
+**Special Purpose Icons:**
+- `data-cleaning.png`: Data cleaning workflows
+- `feature-engineering.png`: Feature engineering tasks
+## Agent Loading Process
+### For Individual Queries
+When a user makes a query like `@preprocessing_agent analyze my data`:
+1. **Check User Preferences**: System looks up user's enabled individual templates
+2. **Apply Defaults**: If no preference exists, default agents are enabled
+3. **Load Agent**: System loads the specific agent template and executes the query
+4. **Track Usage**: Usage count is incremented for analytics
+### For Planner Workflows
+When a user makes a general query that triggers the planner:
+1. **Get Enabled Planner Templates**: System queries user's enabled planner variants
+2. **Apply 10-Template Limit**: Maximum 10 templates for performance
+3. **Sort by Usage**: Most-used templates get priority
+4. **Create Plan**: Planner selects appropriate agents for the analysis
+5. **Execute Workflow**: Selected agents execute in sequence
+6. **Update Usage**: Usage statistics updated for selected agents
+### Default Agent Behavior
+```python
+# Default agents enabled for new users
+individual_defaults = [
+    "preprocessing_agent",
+    "statistical_analytics_agent",
+    "sk_learn_agent",
+    "data_viz_agent"
+]
+planner_defaults = [
+    "planner_preprocessing_agent",
+    "planner_statistical_analytics_agent",
+    "planner_sk_learn_agent",
+    "planner_data_viz_agent"
+]
+```
+## Usage Analytics
+### Global Usage Tracking
+The system tracks global usage statistics across all users:
+- **Total usage count** per template
+- **User-specific usage** for personalization
+- **Last used timestamps** for sorting
+### Usage-Based Features
+- **Template Recommendations**: Popular templates shown first
+- **Personalized Ordering**: User's most-used templates prioritized
+- **Analytics Dashboard**: Usage patterns for administrators
+## Template Restrictions
+### User Limitations
+- **Minimum 1 Agent**: Cannot disable all templates
+- **Maximum 10 for Planner**: Performance optimization
+- **Premium Templates**: Some templates require premium access
+### System Limitations
+- **Active Templates Only**: Inactive templates not available
+- **Variant Compatibility**: Individual/planner variants managed separately
+- **Category Organization**: Templates must belong to valid categories
+## Integration with Deep Analysis
+The deep analysis system uses the template preference system:
+1. **Load User Preferences**: Gets enabled planner templates for user
+2. **Create Agent Pool**: Instantiates agents from enabled templates
+3. **Execute Analysis**: Uses available agents for comprehensive analysis
+4. **Fallback Behavior**: Uses default agents if no preferences found
+This ensures users get personalized deep analysis based on their template preferences while maintaining system performance through the 10-template limit.

docs/architecture/architecture.md ADDED Viewed

	@@ -0,0 +1,427 @@

+# Auto-Analyst Backend System Architecture
+## Overview
+Auto-Analyst is a sophisticated multi-agent AI platform designed for comprehensive data analysis. The backend system orchestrates specialized AI agents, manages user sessions, and provides a robust API for data processing and analysis workflows.
+## 🏗️ High-Level Architecture
+```
+┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
+│   Frontend      │    │     Backend      │    │    Database     │
+│   (Next.js)     │◄──►│    (FastAPI)     │◄──►│ (PostgreSQL/    │
+│                 │    │                  │    │  SQLite)        │
+└─────────────────┘    └──────────────────┘    └─────────────────┘
+                              │
+                              ▼
+                       ┌──────────────────┐
+                       │   AI Models      │
+                       │   (DSPy/LLMs)    │
+                       └──────────────────┘
+                              │
+                              ▼
+                       ┌──────────────────┐
+                       │ Agent System     │
+                       │ [Processing]     │
+                       │ [Analytics]      │
+                       │ [Visualization]  │
+                       └──────────────────┘
+```
+## 🎯 Core Components
+### 1. Application Layer (`app.py`)
+**FastAPI Application Server**
+- **Role**: Main HTTP server and request router
+- **Responsibilities**:
+  - Request/response handling
+  - Session-based authentication
+  - Route registration and middleware
+  - Error handling and logging
+  - Static file serving
+  - CORS configuration
+**Key Features**:
+- Async/await support for high concurrency
+- Automatic API documentation generation
+- Request validation with Pydantic
+- Session management for user tracking
+### 2. Agent System (`src/agents/`)
+**Multi-Agent Orchestra**
+- **Core Agents**: Specialized AI agents for different analysis tasks
+- **Deep Analysis**: Advanced multi-agent coordination system
+- **Template System**: User-customizable agent configurations
+#### Agent Types
+1. **Individual Agents** (`agents.py`):
+   ```python
+   - preprocessing_agent         # Data cleaning and preparation
+   - statistical_analytics_agent # Statistical analysis
+   - sk_learn_agent             # Machine learning with scikit-learn
+   - data_viz_agent             # Data visualization
+   - basic_qa_agent             # General Q&A
+   ```
+2. **Planner Agents** (Multi-agent coordination):
+   ```python
+   - planner_preprocessing_agent
+   - planner_statistical_analytics_agent
+   - planner_sk_learn_agent
+   - planner_data_viz_agent
+   ```
+3. **Deep Analysis System** (`deep_agents.py`):
+   ```python
+   - deep_questions         # Question generation
+   - deep_planner          # Execution planning
+   - deep_code_synthesizer # Code combination
+   - deep_synthesizer      # Result synthesis
+   - final_conclusion      # Report generation
+   ```
+#### Agent Architecture Pattern
+```python
+class AgentSignature(dspy.Signature):
+    """Agent description and purpose"""
+    goal = dspy.InputField(desc="Analysis objective")
+    dataset = dspy.InputField(desc="Dataset information")
+    plan_instructions = dspy.InputField(desc="Execution plan")
+    summary = dspy.OutputField(desc="Analysis summary")
+    code = dspy.OutputField(desc="Generated code")
+```
+### 3. Database Layer (`src/db/`)
+**Data Persistence and Management**
+#### Database Models (`schemas/models.py`):
+```python
+# Core Models
+User              # User accounts and authentication
+Chat              # Conversation sessions
+Message           # Individual messages in chats
+ModelUsage        # AI model usage tracking
+# Template System
+AgentTemplate     # Agent definitions and configurations
+UserTemplatePreference  # User's enabled/disabled agents
+# Deep Analysis
+DeepAnalysisReport     # Analysis reports and results
+# Analytics
+CodeExecution     # Code execution tracking
+UserAnalytics     # User behavior analytics
+```
+#### Database Architecture:
+```
+Users (1) ──────── (Many) Chats
+  │                        │
+  │                        ▼
+  └─── (Many) ModelUsage ──┘
+  │
+  └─── (Many) UserTemplatePreference
+               │
+               ▼
+         AgentTemplate
+```
+### 4. Route Handlers (`src/routes/`)
+**RESTful API Endpoints**
+| Module | Purpose | Key Endpoints |
+|--------|---------|---------------|
+| `session_routes.py` | Core functionality | `/upload_excel`, `/session_info` |
+| `chat_routes.py` | Chat management | `/chats`, `/messages`, `/delete_chat` |
+| `code_routes.py` | Code operations | `/execute_code`, `/get_latest_code` |
+| `templates_routes.py` | Agent templates | `/templates`, `/user/{id}/enabled` |
+| `deep_analysis_routes.py` | Deep analysis | `/reports`, `/download_from_db` |
+| `analytics_routes.py` | System analytics | `/usage`, `/feedback`, `/costs` |
+| `feedback_routes.py` | User feedback | `/feedback`, `/message/{id}/feedback` |
+NOTE: Make sure to add a router prefix when calling these endpoints, such as to get dashboard, you'll use `http://localhost:8000/templates/dashboard`
+### 5. Business Logic Layer (`src/managers/`)
+**Service Layer for Complex Operations**
+#### Manager Components:
+1. **`chat_manager.py`**:
+   ```python
+   - Session management
+   - Message handling
+   - Context preservation
+   - Agent orchestration
+   ```
+2. **`ai_manager.py`**:
+   ```python
+   - Model selection and routing
+   - Token tracking and cost calculation
+   - Error handling and retries
+   - Response formatting
+   ```
+3. **`session_manager.py`**:
+   ```python
+   - Session lifecycle management
+   - Data sharing between agents
+   - Memory management
+   - Cleanup operations
+   ```
+### 6. Utility Layer (`src/utils/`)
+**Shared Services and Helpers**
+- **`logger.py`**: Centralized logging system
+- **`generate_report.py`**: HTML report generation
+- **`model_registry.py`**: AI model configuration
+## 🔄 Data Flow Architecture
+### 1. Request Processing Flow
+```
+HTTP Request → FastAPI Router → Route Handler → Manager/Business Logic →
+Database/Agent System → AI Model → Response Processing → JSON Response
+```
+### 2. Agent Execution Flow
+```
+User Query → Session Creation → Template Selection → Agent Loading →
+Code Generation → Code Execution → Result Processing → Response Formatting
+```
+### 3. Deep Analysis Flow
+```
+Analysis Goal → Question Generation → Planning Phase → Agent Coordination →
+Code Synthesis → Execution → Result Synthesis → Final Report Generation
+```
+### 4. Template System Flow
+```
+User Preferences → Template Loading → Agent Registration →
+Capability Mapping → Execution Routing → Usage Tracking
+```
+## 🎨 Design Patterns
+### 1. **Module Pattern**
+- Clear separation of concerns
+- Each module has specific responsibilities
+- Minimal dependencies between modules
+### 2. **Repository Pattern**
+- Database access abstracted through SQLAlchemy
+- Session management centralized
+- Clean separation of data and business logic
+### 3. **Strategy Pattern**
+- Multiple AI models supported through unified interface
+- Agent selection based on user preferences
+- Dynamic template loading
+### 4. **Observer Pattern**
+- Usage tracking and analytics
+- Event-driven model updates
+- Real-time progress notifications
+## 🔧 Configuration Management
+### Environment Configuration
+```python
+# Database
+DATABASE_URL: str           # Database connection string
+POSTGRES_PASSWORD: str      # PostgreSQL password (optional)
+# AI Models
+ANTHROPIC_API_KEY: str      # Claude API key
+OPENAI_API_KEY: str         # OpenAI API key
+# Authentication
+ADMIN_API_KEY: str          # Admin operations key (optional)
+# Deployment
+PORT: int = 8000            # Server port
+DEBUG: bool = False         # Debug mode
+```
+### Agent Configuration (`agents_config.json`)
+```json
+{
+  "default_agents": [
+    {
+      "template_name": "preprocessing_agent",
+      "description": "Data cleaning and preparation",
+      "variant_type": "both",
+      "is_premium": false,
+      "usage_count": 0,
+      "icon_url": "preprocessing.svg"
+    }
+  ],
+  "premium_templates": [...],
+  "remove": [...]
+}
+```
+## 🔒 Security Architecture
+### Authentication & Authorization
+1. **Session-based Authentication**:
+   - Session IDs for user identification
+   - Optional API key authentication for admin endpoints
+2. **Input Validation**:
+   - Pydantic models for request validation
+   - SQL injection prevention through SQLAlchemy
+   - File upload restrictions and validation
+3. **Resource Protection**:
+   - User-specific data isolation
+   - Usage tracking and monitoring
+   - Rate limiting considerations
+### Data Security
+1. **Database Security**:
+   - Encrypted connections for PostgreSQL
+   - Parameterized queries
+   - Regular backup procedures
+2. **Code Execution Security**:
+   - Sandboxed code execution environment
+   - Limited library imports
+   - Timeout protection
+## 📊 Performance Architecture
+### Scalability Features
+1. **Async Architecture**:
+   - Non-blocking I/O operations
+   - Concurrent agent execution
+   - Streaming responses for long operations
+2. **Database Optimization**:
+   - Connection pooling
+   - Query optimization
+   - Indexed frequently accessed columns
+3. **Caching Strategy**:
+   - In-memory caching for templates
+   - Result caching for expensive operations
+   - Session data management
+### Performance Monitoring
+1. **Usage Analytics**:
+   - Request/response time tracking
+   - Token usage monitoring
+   - Error rate analysis
+2. **Resource Monitoring**:
+   - Database query performance
+   - Memory usage tracking
+   - Agent execution time analysis
+## 🚀 Deployment Architecture
+### Development Environment
+```
+Local Development → SQLite Database → File-based Logging →
+Direct Model API Calls → Hot Reloading
+```
+### Production Environment
+```
+Load Balancer → Multiple FastAPI Instances → PostgreSQL Database →
+Centralized Logging → Monitoring & Alerting
+```
+### Container Architecture
+```dockerfile
+# Multi-stage build for optimization
+FROM python:3.11-slim as base
+# Dependencies and application setup
+# Health checks and graceful shutdown
+# Environment-specific configurations
+```
+## 🔄 Integration Patterns
+### External Service Integration
+1. **AI Model Providers**:
+   - Anthropic (Claude)
+   - OpenAI (GPT models)
+   - Unified interface through DSPy
+2. **Database Systems**:
+   - PostgreSQL (production)
+   - SQLite (development)
+   - Migration support through Alembic
+### Frontend Integration
+1. **REST API**:
+   - Standard HTTP endpoints
+   - JSON request/response format
+   - Session-based communication
+2. **Data Exchange**:
+   - File upload capabilities
+   - Real-time analysis results
+   - Report generation and download
+### Third-Party Integration
+1. **Python Data Science Stack (For Agentic Use only)**:
+   - Pandas for data manipulation
+   - NumPy for numerical computing
+   - Scikit-learn for machine learning
+   - Plotly for visualization
+   - Statsmodels for statistical analysis
+2. **Development Tools**:
+   - Alembic for database migrations
+   - SQLAlchemy for ORM
+   - FastAPI for web framework
+   - Pydantic for data validation
+## 📝 Documentation Architecture
+### API Documentation
+1. **Auto-generated Docs**: Available at `/docs` endpoint
+2. **Schema Definitions**: Pydantic models with descriptions
+3. **Endpoint Documentation**: Detailed parameter and response docs
+### Code Documentation
+1. **Inline Documentation**: Comprehensive docstrings
+2. **Architecture Guides**: High-level system design documentation
+3. **Getting Started**: Developer onboarding documentation
+4. **Troubleshooting**: Common issues and solutions
+This architecture provides a robust, scalable foundation for multi-agent AI analysis while maintaining clean separation of concerns and supporting both development and production deployment scenarios.

docs/development/development_workflow.md ADDED Viewed

	@@ -0,0 +1,506 @@

+# Auto-Analyst Backend Development Workflow
+## 🎯 Development Philosophy
+The Auto-Analyst backend follows modern Python development practices with emphasis on:
+- **Modularity**: Clear separation of concerns across components
+- **Async-First**: Non-blocking operations for scalability
+- **Type Safety**: Comprehensive type hints and validation
+- **Documentation**: Self-documenting code and comprehensive docs
+- **Testing**: Robust testing at multiple levels
+- **Performance**: Optimized for real-world usage patterns
+## 🏗️ Code Organization Principles
+### 1. **Directory Structure Standards**
+```
+src/
+├── agents/           # AI agent implementations
+│   ├── agents.py    # Core agent definitions
+│   ├── deep_agents.py # Deep analysis system
+│   └── retrievers/  # Information retrieval components
+├── db/              # Database layer
+│   ├── init_db.py   # Database initialization
+│   └── schemas/     # SQLAlchemy models
+├── managers/        # Business logic layer
+│   ├── chat_manager.py    # Chat operations
+│   ├── ai_manager.py      # AI model management
+│   └── session_manager.py # Session lifecycle
+├── routes/          # FastAPI route handlers
+│   ├── session_routes.py     # Core functionality
+│   ├── chat_routes.py     # Chat endpoints
+│   └── [feature]_routes.py # Feature-specific routes
+├── utils/           # Shared utilities
+│   ├── logger.py    # Centralized logging
+│   └── helpers.py   # Common functions
+└── schemas/         # Pydantic models
+    ├── chat_schemas.py    # Chat data models
+    └── [feature]_schemas.py # Feature schemas
+```
+### 2. **Import Organization**
+```python
+# Standard library imports
+import asyncio
+import json
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+# Third-party imports
+import dspy
+import pandas as pd
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+# Local imports
+from src.db.init_db import session_factory
+from src.db.schemas.models import User, Chat
+from src.utils.logger import Logger
+from src.managers.chat_manager import ChatManager
+```
+## 🛠️ Development Patterns
+### 1. **Agent Development Pattern**
+```python
+# 1. Define DSPy Signature
+class new_analysis_agent(dspy.Signature):
+    """
+    Comprehensive docstring explaining:
+    - Agent purpose and capabilities
+    - Input requirements and formats
+    - Expected output format
+    - Usage examples
+    """
+    goal = dspy.InputField(desc="Clear description of analysis objective")
+    dataset = dspy.InputField(desc="Dataset structure and content description")
+    plan_instructions = dspy.InputField(desc="Execution plan from planner")
+    summary = dspy.OutputField(desc="Natural language summary of analysis")
+    code = dspy.OutputField(desc="Executable Python code for analysis")
+# 2. Add to Agent Configuration
+# In agents_config.json:
+{
+  "template_name": "new_analysis_agent",
+  "description": "Performs specialized analysis on datasets",
+  "variant_type": "both",  # individual, planner, or both
+  "is_premium": false, # Will be active by default
+  "usage_count": 0,
+  "icon_url": "analysis.svg"
+}
+# 3. Register in Agent System
+# In agents.py, add to the appropriate loading functions
+```
+### 2. **Route Development Pattern**
+```python
+# 1. Create route file: src/routes/feature_routes.py
+from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
+from typing import List, Optional
+from src.db.init_db import session_factory
+from src.db.schemas.models import FeatureModel
+from src.utils.logger import Logger
+logger = Logger("feature_routes", see_time=True, console_log=False)
+router = APIRouter(prefix="/feature", tags=["feature"])
+# 2. Define Pydantic schemas
+class FeatureCreate(BaseModel):
+    name: str
+    description: Optional[str] = None
+class FeatureResponse(BaseModel):
+    id: int
+    name: str
+    description: Optional[str]
+    created_at: datetime
+# 3. Implement endpoints with proper error handling
+@router.post("/", response_model=FeatureResponse)
+async def create_feature(feature: FeatureCreate):
+    try:
+        session = session_factory()
+        try:
+            new_feature = FeatureModel(
+                name=feature.name,
+                description=feature.description
+            )
+            session.add(new_feature)
+            session.commit()
+            session.refresh(new_feature)
+            return FeatureResponse(
+                id=new_feature.id,
+                name=new_feature.name,
+                description=new_feature.description,
+                created_at=new_feature.created_at
+            )
+        except Exception as e:
+            session.rollback()
+            logger.log_message(f"Error creating feature: {str(e)}", level=logging.ERROR)
+            raise HTTPException(status_code=500, detail=f"Failed to create feature: {str(e)}")
+        finally:
+            session.close()
+    except Exception as e:
+        logger.log_message(f"Error in create_feature: {str(e)}", level=logging.ERROR)
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+# 4. Register in app.py
+from src.routes.feature_routes import router as feature_router
+app.include_router(feature_router)
+```
+### 3. **Database Model Pattern**
+```python
+# In src/db/schemas/models.py
+from sqlalchemy import Column, Integer, String, DateTime, Boolean, Text, ForeignKey
+from sqlalchemy.orm import relationship
+from sqlalchemy.ext.declarative import declarative_base
+from datetime import datetime, timezone
+Base = declarative_base()
+class NewModel(Base):
+    __tablename__ = "new_models"
+    # Primary key
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    # Required fields
+    name = Column(String(255), nullable=False, unique=True)
+    # Optional fields
+    description = Column(Text, nullable=True)
+    is_active = Column(Boolean, default=True, nullable=False)
+    # Timestamps
+    created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
+    updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc), nullable=False)
+    # Foreign keys
+    user_id = Column(Integer, ForeignKey("users.user_id"), nullable=True)
+    # Relationships
+    user = relationship("User", back_populates="new_models")
+    def __repr__(self):
+        return f"<NewModel(id={self.id}, name='{self.name}')>"
+# Update User model to include back reference
+class User(Base):
+    # ... existing fields ...
+    new_models = relationship("NewModel", back_populates="user")
+```
+### 4. **Manager Pattern**
+```python
+# In src/managers/feature_manager.py
+from typing import List, Optional, Dict, Any
+from sqlalchemy.orm import Session
+from src.db.schemas.models import FeatureModel
+from src.utils.logger import Logger
+logger = Logger("feature_manager", see_time=True, console_log=False)
+class FeatureManager:
+    """
+    Manages business logic for feature operations.
+    Separates complex business logic from route handlers.
+    """
+    def __init__(self, session: Session):
+        self.session = session
+    async def create_feature(self, name: str, description: Optional[str] = None) -> FeatureModel:
+        """Create a new feature with validation and business logic."""
+        try:
+            # Validation
+            if not name or len(name.strip()) == 0:
+                raise ValueError("Feature name cannot be empty")
+            # Check for duplicates
+            existing = self.session.query(FeatureModel).filter_by(name=name).first()
+            if existing:
+                raise ValueError(f"Feature with name '{name}' already exists")
+            # Create feature
+            feature = FeatureModel(name=name, description=description)
+            self.session.add(feature)
+            self.session.commit()
+            self.session.refresh(feature)
+            logger.log_message(f"Created feature: {name}", level=logging.INFO)
+            return feature
+        except Exception as e:
+            self.session.rollback()
+            logger.log_message(f"Error creating feature: {str(e)}", level=logging.ERROR)
+            raise
+    async def get_features(self, active_only: bool = True) -> List[FeatureModel]:
+        """Retrieve features with optional filtering."""
+        try:
+            query = self.session.query(FeatureModel)
+            if active_only:
+                query = query.filter(FeatureModel.is_active == True)
+            features = query.order_by(FeatureModel.created_at.desc()).all()
+            return features
+        except Exception as e:
+            logger.log_message(f"Error retrieving features: {str(e)}", level=logging.ERROR)
+            raise
+```
+## 📋 Code Quality Standards
+### 1. **Type Hints and Documentation**
+```python
+from typing import List, Optional, Dict, Any, Union
+from datetime import datetime
+async def process_analysis_data(
+    data: pd.DataFrame,
+    analysis_type: str,
+    user_id: Optional[int] = None,
+    options: Dict[str, Any] = None
+) -> Dict[str, Union[str, List[Any], bool]]:
+    """
+    Process analysis data with specified parameters.
+    Args:
+        data: Input DataFrame containing the data to analyze
+        analysis_type: Type of analysis to perform ("statistical", "ml", "viz")
+        user_id: Optional user ID for tracking and personalization
+        options: Additional options for analysis configuration
+    Returns:
+        Dictionary containing:
+        - status: "success" or "error"
+        - result: Analysis results or error message
+        - metadata: Additional information about the analysis
+    Raises:
+        ValueError: If analysis_type is not supported
+        DataError: If data format is invalid
+    Example:
+        >>> data = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+        >>> result = await process_analysis_data(data, "statistical")
+        >>> print(result["status"])
+        "success"
+    """
+    if options is None:
+        options = {}
+    # Implementation...
+    return {"status": "success", "result": [], "metadata": {}}
+```
+### 2. **Error Handling Patterns**
+```python
+# Comprehensive error handling with logging and user-friendly messages
+async def safe_operation(data: Any) -> Dict[str, Any]:
+    """
+    Template for safe operations with comprehensive error handling.
+    """
+    try:
+        # Validation
+        if not data:
+            raise ValueError("Data cannot be empty")
+        # Main operation
+        result = await perform_operation(data)
+        # Success logging
+        logger.log_message("Operation completed successfully", level=logging.INFO)
+        return {"success": True, "data": result}
+    except ValueError as e:
+        # Input validation errors
+        logger.log_message(f"Validation error: {str(e)}", level=logging.WARNING)
+        return {"success": False, "error": "Invalid input", "details": str(e)}
+    except ConnectionError as e:
+        # External service errors
+        logger.log_message(f"Connection error: {str(e)}", level=logging.ERROR)
+        return {"success": False, "error": "Service unavailable", "details": "Please try again later"}
+    except Exception as e:
+        # Unexpected errors
+        logger.log_message(f"Unexpected error in safe_operation: {str(e)}", level=logging.ERROR)
+        return {"success": False, "error": "Internal error", "details": "Please contact support"}
+```
+### 3. **Async/Await Best Practices**
+```python
+import asyncio
+from typing import List, Coroutine
+# Proper async function definition
+async def async_agent_execution(agents: List[str], query: str) -> List[Dict[str, Any]]:
+    """Execute multiple agents concurrently."""
+    # Create coroutines
+    tasks = [
+        execute_single_agent(agent, query)
+        for agent in agents
+    ]
+    # Execute concurrently with error handling
+    results = []
+    for task in asyncio.as_completed(tasks):
+        try:
+            result = await task
+            results.append(result)
+        except Exception as e:
+            logger.log_message(f"Agent execution failed: {e}", level=logging.ERROR)
+            results.append({"error": str(e)})
+    return results
+# Database operations with proper session management
+async def async_database_operation(session: Session) -> Any:
+    """Template for async database operations."""
+    try:
+        # Use asyncio.to_thread for CPU-bound database operations
+        result = await asyncio.to_thread(
+            lambda: session.query(Model).filter(...).all()
+        )
+        return result
+    except Exception as e:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+```
+## 🔧 Development Workflow
+### 1. **Feature Development Process**
+1. **Plan the Feature**:
+   ```bash
+   # Create feature branch
+   git checkout -b feature/new-analysis-agent
+   # Document requirements
+   echo "## New Analysis Agent" >> docs/feature_plan.md
+   ```
+2. **Implement Core Logic**:
+   ```bash
+   # Create agent signature
+   # Add to agents_config.json
+   # Implement business logic in managers/
+   # Create route handlers
+   ```
+3. **Add Database Changes**:
+   ```bash
+   # Modify models if needed
+   alembic revision --autogenerate -m "Add new analysis tables"
+   alembic upgrade head
+   ```
+### 3. **Release Process**
+1. **Pre-release Testing**:
+   ```bash
+   # Run full test suite
+   pytest tests/
+   # Test database migrations
+   alembic upgrade head
+   # Test with sample data
+   python scripts/test_with_sample_data.py
+   ```
+2. **Documentation Updates**:
+   ```bash
+   # Update API documentation
+   # Update troubleshooting guide
+   # Update changelog
+   ```
+3. **Deployment Preparation**:
+   ```bash
+   # Update requirements.txt
+   pip freeze > requirements.txt
+   # Test container build
+   docker build -t auto-analyst-backend .
+   ```
+## 📊 Performance Considerations
+### 1. **Database Optimization**
+```python
+# Use query optimization
+from sqlalchemy.orm import joinedload
+# Bad: N+1 query problem
+users = session.query(User).all()
+for user in users:
+    print(user.chats)  # Separate query for each user
+# Good: Eager loading
+users = session.query(User).options(joinedload(User.chats)).all()
+for user in users:
+    print(user.chats)  # No additional queries
+# Use pagination for large datasets
+def get_paginated_results(session, model, page=1, per_page=20):
+    offset = (page - 1) * per_page
+    return session.query(model).offset(offset).limit(per_page).all()
+```
+### 2. **Async Optimization**
+```python
+# Use connection pooling
+from sqlalchemy.pool import QueuePool
+engine = create_engine(
+    DATABASE_URL,
+    poolclass=QueuePool,
+    pool_size=20,
+    max_overflow=30
+)
+# Batch operations
+async def batch_process_agents(agents: List[str], queries: List[str]):
+    semaphore = asyncio.Semaphore(5)  # Limit concurrent operations
+    async def process_with_limit(agent, query):
+        async with semaphore:
+            return await process_agent(agent, query)
+    tasks = [
+        process_with_limit(agent, query)
+        for agent, query in zip(agents, queries)
+    ]
+    return await asyncio.gather(*tasks, return_exceptions=True)
+```
+This development workflow guide provides a comprehensive framework for maintaining code quality, consistency, and performance in the Auto-Analyst backend system. Following these patterns ensures that new features integrate seamlessly with the existing architecture while maintaining the high standards of the codebase.

docs/getting_started.md ADDED Viewed

	@@ -0,0 +1,273 @@

+# Auto-Analyst Backend - Getting Started Guide
+## 🎯 Overview
+This guide will help you set up and understand the Auto-Analyst backend system. Auto-Analyst is a multi-agent AI platform that orchestrates specialized agents for comprehensive data analysis.
+## 🏗️ Core Concepts
+### 1. **Multi-Agent System**
+The platform uses specialized AI agents:
+- **Preprocessing Agent**: Data cleaning and preparation
+- **Statistical Analytics Agent**: Statistical analysis and insights
+- **Machine Learning Agent**: Scikit-learn based modeling
+- **Data Visualization Agent**: Chart and plot generation
+### 2. **Template System**
+- **Individual Agents**: Single-purpose agents for specific tasks
+- **Planner Agents**: Multi-agent coordination for complex workflows
+- **User Templates**: Customizable agent preferences
+- **Default vs Premium**: Core agents available to all users
+### 3. **Session Management**
+- Session-based user tracking
+- Shared DataFrame context between agents
+- Conversation history and code execution tracking
+### 4. **Deep Analysis System**
+- Multi-step analysis workflow (questions → planning → execution → synthesis)
+- Streaming progress updates
+- HTML report generation
+## 🚀 Quick Start
+### 1. Installation
+```bash
+# Clone and navigate to backend
+cd Auto-Analyst-CS/auto-analyst-backend
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # Linux/Mac
+# or
+venv\Scripts\activate     # Windows
+# Install dependencies
+pip install -r requirements.txt
+```
+### 2. Environment Variables
+Create `.env` file with:
+```env
+# Database
+DATABASE_URL=sqlite:///./auto_analyst.db  # For development
+# DATABASE_URL=postgresql://user:pass@host:port/db  # For production
+# AI Models
+ANTHROPIC_API_KEY=your_anthropic_key_here
+OPENAI_API_KEY=your_openai_key_here
+# Authentication (optional)
+ADMIN_API_KEY=your_admin_key_here
+```
+### 3. Database Initialization
+```bash
+# Initialize database and default agents
+python -c "
+from src.db.init_db import init_db
+init_db()
+print('✅ Database initialized successfully')
+"
+```
+### 4. Start the Server
+```bash
+# Development server
+python app.py
+# Or with uvicorn
+uvicorn app:app --reload --host 0.0.0.0 --port 8000
+```
+### 5. Verify Setup
+Visit: `http://localhost:8000/docs` for interactive API documentation
+## 📚 Key Files to Understand
+### Core Application Files
+1. **`app.py`** - Main FastAPI application and core endpoints
+2. **`src/agents/agents.py`** - Agent definitions and orchestration
+3. **`src/agents/deep_agents.py`** - Deep analysis system
+4. **`src/db/schemas/models.py`** - Database models
+5. **`src/managers/chat_manager.py`** - Chat and session management
+### Route Files (API Endpoints)
+- **`src/routes/session_routes.py`** - File uploads, sessions, authentication
+- **`src/routes/chat_routes.py`** - Chat and messaging
+- **`src/routes/code_routes.py`** - Code execution and processing
+- **`src/routes/templates_routes.py`** - Agent template management
+- **`src/routes/deep_analysis_routes.py`** - Deep analysis reports
+- **`src/routes/analytics_routes.py`** - Usage analytics and monitoring
+### Configuration Files
+- **`agents_config.json`** - Agent and template definitions
+- **`requirements.txt`** - Python dependencies
+- **`alembic.ini`** - Database migration configuration
+## 🔧 Development Workflow
+### 1. Adding New Agents
+```python
+# 1. Define agent signature in src/agents/agents.py
+class new_agent(dspy.Signature):
+    """Agent description"""
+    goal = dspy.InputField(desc="Analysis goal")
+    dataset = dspy.InputField(desc="Dataset info")
+    result = dspy.OutputField(desc="Analysis result")
+# 2. Add to agents_config.json
+{
+  "template_name": "new_agent",
+  "description": "Agent description",
+  "variant_type": "both",
+  "is_premium": false,
+  "usage_count": 0
+}
+# 3. Register in agent loading system
+```
+### 2. Adding New Endpoints
+```python
+# 1. Create route in src/routes/feature_routes.py
+from fastapi import APIRouter
+router = APIRouter(prefix="/feature", tags=["feature"])
+@router.get("/endpoint")
+async def new_endpoint():
+    return {"message": "Hello"}
+# 2. Register in app.py
+from src.routes.feature_routes import router as feature_router
+app.include_router(feature_router)
+```
+### 3. Database Changes
+```bash
+# 1. Modify models in src/db/schemas/models.py
+# 2. Create migration
+alembic revision --autogenerate -m "description"
+# 3. Apply migration
+alembic upgrade head
+```
+## 🧪 Testing Your Changes
+### 1. Test API Endpoints
+```bash
+# Use the interactive docs
+open http://localhost:8000/docs
+# Or use curl
+curl -X GET "http://localhost:8000/health"
+```
+### 2. Test Agent System
+```python
+# Test individual agent
+python -c "
+from src.agents.agents import preprocessing_agent
+import dspy
+dspy.LM('anthropic/claude-sonnet-4-20250514')
+agent = dspy.ChainOfThought(preprocessing_agent)
+result = agent(goal='clean data', dataset='test data')
+print(result)
+"
+```
+### 3. Test Database Operations
+```python
+# Test database
+python -c "
+from src.db.init_db import session_factory
+from src.db.schemas.models import AgentTemplate
+session = session_factory()
+templates = session.query(AgentTemplate).all()
+print(f'Found {len(templates)} templates')
+session.close()
+"
+```
+## 🔍 Common Development Tasks
+### Adding a New Feature
+1. **Plan the Feature**: Define requirements and API design
+2. **Database Changes**: Add new models if needed
+3. **Create Routes**: Add API endpoints in `src/routes/`
+4. **Business Logic**: Add managers in `src/managers/` if complex
+5. **Documentation**: Update relevant `.md` files
+6. **Testing**: Test endpoints and integration
+### Debugging Issues
+1. **Check Logs**: Application logs show detailed error information
+2. **Database State**: Verify data with database queries
+3. **API Testing**: Use `/docs` interface for endpoint testing
+4. **Agent Behavior**: Test individual agents separately
+### Performance Optimization
+1. **Database Queries**: Use SQLAlchemy query optimization
+2. **Agent Execution**: Implement async patterns for agent orchestration
+3. **Resource Management**: Monitor memory usage for large datasets
+## 📊 System Architecture Overview
+```mermaid
+graph TD
+    A[Frontend Request] --> B[FastAPI Router]
+    B --> C[Route Handler]
+    C --> D[Manager Layer]
+    D --> E[Database Layer]
+    D --> F[Agent System]
+    F --> G[AI Models]
+    G --> H[Code Generation]
+    H --> I[Execution Environment]
+    I --> J[Results Processing]
+    J --> K[Response]
+    subgraph "Agent Orchestration"
+        F1[Individual Agents]
+        F2[Planner Module]
+        F3[Deep Analysis]
+        F1 --> F2
+        F2 --> F3
+    end
+    F --> F1
+```
+## 📈 Template Integration
+The system uses **active user templates** for agent selection:
+### Default Agents (Always Available)
+- `preprocessing_agent` (individual & planner variants)
+- `statistical_analytics_agent` (individual & planner variants)
+- `sk_learn_agent` (individual & planner variants)
+- `data_viz_agent` (individual & planner variants)
+### Template Loading Logic
+1. **Individual Agent Execution** (`@agent_name`): Loads ALL available templates
+2. **Planner Execution**: Loads user's enabled templates (max 10 for performance)
+3. **Deep Analysis**: Uses user's active template preferences
+4. **Fallback**: Uses 4 core agents if no user preferences found
+This architecture ensures users can leverage their preferred agents while maintaining system performance and reliability.

docs/system/database-schema.md ADDED Viewed

	@@ -0,0 +1,289 @@

+# Auto-Analyst Database Schema Documentation
+## 📋 Overview
+The Auto-Analyst backend uses a relational database schema designed for scalability and data integrity. The schema supports both **SQLite** (development) and **PostgreSQL** (production) databases through SQLAlchemy ORM.
+### **Database Features**
+- **User Management** - Authentication and user data
+- **Chat System** - Conversation sessions and message history
+- **AI Model Tracking** - Usage analytics and cost monitoring
+- **Code Execution** - Code generation and execution tracking
+- **Agent Templates** - Customizable AI agent configurations
+- **Deep Analysis** - Multi-step analysis reports and results
+- **User Feedback** - Rating and feedback system
+---
+## 🗄️ Database Tables
+### **1. Users Table (`users`)**
+**Purpose**: Core user authentication and profile management
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `user_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique user identifier |
+| `username` | `STRING` | UNIQUE, NOT NULL | User's display name |
+| `email` | `STRING` | UNIQUE, NOT NULL | User's email address |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Account creation timestamp |
+**Relationships:**
+- **One-to-Many**: `chats` (User → Chat sessions)
+- **One-to-Many**: `usage_records` (User → Model usage tracking)
+- **One-to-Many**: `deep_analysis_reports` (User → Analysis reports)
+- **One-to-Many**: `template_preferences` (User → Agent preferences)
+---
+### **2. Chats Table (`chats`)**
+**Purpose**: Conversation sessions and chat organization
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `chat_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique chat session identifier |
+| `user_id` | `INTEGER` | FOREIGN KEY → `users.user_id`, CASCADE DELETE | Chat owner (nullable for anonymous) |
+| `title` | `STRING` | DEFAULT: 'New Chat' | Human-readable chat title |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Chat creation timestamp |
+**Relationships:**
+- **Many-to-One**: `user` (Chat → User)
+- **One-to-Many**: `messages` (Chat → Messages)
+- **One-to-Many**: `usage_records` (Chat → Model usage)
+---
+### **3. Messages Table (`messages`)**
+**Purpose**: Individual messages within chat conversations
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `message_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique message identifier |
+| `chat_id` | `INTEGER` | FOREIGN KEY → `chats.chat_id`, CASCADE DELETE | Parent chat session |
+| `sender` | `STRING` | NOT NULL | Message sender: 'user' or 'ai' |
+| `content` | `TEXT` | NOT NULL | Message content (text/markdown) |
+| `timestamp` | `DATETIME` | DEFAULT: UTC NOW | Message creation time |
+**Relationships:**
+- **Many-to-One**: `chat` (Message → Chat)
+- **One-to-One**: `feedback` (Message → Feedback)
+---
+### **4. Model Usage Table (`model_usage`)**
+**Purpose**: AI model usage tracking for analytics and billing
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `usage_id` | `INTEGER` | PRIMARY KEY | Unique usage record identifier |
+| `user_id` | `INTEGER` | FOREIGN KEY → `users.user_id`, SET NULL | User who triggered the usage |
+| `chat_id` | `INTEGER` | FOREIGN KEY → `chats.chat_id`, SET NULL | Associated chat session |
+| `model_name` | `STRING(100)` | NOT NULL | AI model used (e.g., 'gpt-4o-mini') |
+| `provider` | `STRING(50)` | NOT NULL | Model provider ('openai', 'anthropic', etc.) |
+| `prompt_tokens` | `INTEGER` | DEFAULT: 0 | Input tokens consumed |
+| `completion_tokens` | `INTEGER` | DEFAULT: 0 | Output tokens generated |
+| `total_tokens` | `INTEGER` | DEFAULT: 0 | Total tokens (input + output) |
+| `query_size` | `INTEGER` | DEFAULT: 0 | Query size in characters |
+| `response_size` | `INTEGER` | DEFAULT: 0 | Response size in characters |
+| `cost` | `FLOAT` | DEFAULT: 0.0 | Cost in USD for this usage |
+| `timestamp` | `DATETIME` | DEFAULT: UTC NOW | Usage timestamp |
+| `is_streaming` | `BOOLEAN` | DEFAULT: FALSE | Whether response was streamed |
+| `request_time_ms` | `INTEGER` | DEFAULT: 0 | Request processing time (milliseconds) |
+**Relationships:**
+- **Many-to-One**: `user` (Usage → User)
+- **Many-to-One**: `chat` (Usage → Chat)
+---
+### **5. Code Executions Table (`code_executions`)**
+**Purpose**: Track code generation and execution attempts
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `execution_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique execution identifier |
+| `message_id` | `INTEGER` | FOREIGN KEY → `messages.message_id`, CASCADE DELETE | Associated message |
+| `chat_id` | `INTEGER` | FOREIGN KEY → `chats.chat_id`, CASCADE DELETE | Parent chat session |
+| `user_id` | `INTEGER` | FOREIGN KEY → `users.user_id`, SET NULL | User who triggered execution |
+| `initial_code` | `TEXT` | NULLABLE | First version of generated code |
+| `latest_code` | `TEXT` | NULLABLE | Most recent code version |
+| `is_successful` | `BOOLEAN` | DEFAULT: FALSE | Whether execution succeeded |
+| `output` | `TEXT` | NULLABLE | Execution output (including errors) |
+| `model_provider` | `STRING(50)` | NULLABLE | AI model provider used |
+| `model_name` | `STRING(100)` | NULLABLE | AI model name used |
+| `failed_agents` | `TEXT` | NULLABLE | JSON list of failed agent names |
+| `error_messages` | `TEXT` | NULLABLE | JSON map of error messages by agent |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Execution creation time |
+| `updated_at` | `DATETIME` | DEFAULT: UTC NOW, ON UPDATE | Last update timestamp |
+---
+### **6. Message Feedback Table (`message_feedback`)**
+**Purpose**: User feedback and model settings for messages
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `feedback_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique feedback identifier |
+| `message_id` | `INTEGER` | FOREIGN KEY → `messages.message_id`, CASCADE DELETE | Associated message |
+| `rating` | `INTEGER` | NULLABLE | Star rating (1-5 scale) |
+| `model_name` | `STRING(100)` | NULLABLE | Model used for this message |
+| `model_provider` | `STRING(50)` | NULLABLE | Model provider used |
+| `temperature` | `FLOAT` | NULLABLE | Temperature setting used |
+| `max_tokens` | `INTEGER` | NULLABLE | Max tokens setting used |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Feedback creation time |
+| `updated_at` | `DATETIME` | DEFAULT: UTC NOW, ON UPDATE | Last update timestamp |
+**Relationships:**
+- **One-to-One**: `message` (Feedback ↔ Message)
+---
+### **7. Deep Analysis Reports Table (`deep_analysis_reports`)**
+**Purpose**: Store comprehensive multi-agent analysis reports
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `report_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique report identifier |
+| `report_uuid` | `STRING(100)` | UNIQUE, NOT NULL | Frontend-generated UUID |
+| `user_id` | `INTEGER` | FOREIGN KEY → `users.user_id`, CASCADE DELETE | Report owner |
+| `goal` | `TEXT` | NOT NULL | Analysis objective/question |
+| `status` | `STRING(20)` | NOT NULL, DEFAULT: 'pending' | Status: 'pending', 'running', 'completed', 'failed' |
+| `start_time` | `DATETIME` | DEFAULT: UTC NOW | Analysis start time |
+| `end_time` | `DATETIME` | NULLABLE | Analysis completion time |
+| `duration_seconds` | `INTEGER` | NULLABLE | Total analysis duration |
+| `deep_questions` | `TEXT` | NULLABLE | Generated analytical questions |
+| `deep_plan` | `TEXT` | NULLABLE | Analysis execution plan |
+| `summaries` | `JSON` | NULLABLE | Array of analysis summaries |
+| `analysis_code` | `TEXT` | NULLABLE | Generated Python code |
+| `plotly_figures` | `JSON` | NULLABLE | Array of Plotly figure data |
+| `synthesis` | `JSON` | NULLABLE | Array of synthesis insights |
+| `final_conclusion` | `TEXT` | NULLABLE | Final analysis conclusion |
+| `html_report` | `TEXT` | NULLABLE | Complete HTML report |
+| `progress_percentage` | `INTEGER` | DEFAULT: 0 | Progress percentage (0-100) |
+| `total_tokens_used` | `INTEGER` | DEFAULT: 0 | Total tokens consumed |
+| `estimated_cost` | `FLOAT` | DEFAULT: 0.0 | Estimated cost in USD |
+| `credits_consumed` | `INTEGER` | DEFAULT: 0 | Credits deducted for analysis |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Report creation time |
+| `updated_at` | `DATETIME` | DEFAULT: UTC NOW, ON UPDATE | Last update timestamp |
+**Relationships:**
+- **Many-to-One**: `user` (Report → User)
+---
+### **8. Agent Templates Table (`agent_templates`)**
+**Purpose**: Store predefined AI agent configurations
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `template_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique template identifier |
+| `template_name` | `STRING(100)` | UNIQUE, NOT NULL | Internal template name |
+| `display_name` | `STRING(200)` | NULLABLE | User-friendly display name |
+| `description` | `TEXT` | NOT NULL | Template description |
+| `prompt_template` | `TEXT` | NOT NULL | Agent behavior instructions |
+| `icon_url` | `STRING(500)` | NULLABLE | Template icon URL |
+| `category` | `STRING(50)` | NULLABLE | Template category |
+| `is_premium_only` | `BOOLEAN` | DEFAULT: FALSE | Requires premium subscription |
+| `variant_type` | `STRING(20)` | DEFAULT: 'individual' | 'planner', 'individual', or 'both' |
+| `is_active` | `BOOLEAN` | DEFAULT: TRUE | Template is active/available |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Template creation time |
+**Relationships:**
+- **One-to-Many**: `user_preferences` (Template → User preferences)
+---
+### **9. User Template Preferences Table (`user_template_preferences`)**
+**Purpose**: Track user preferences and usage for agent templates
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| `preference_id` | `INTEGER` | PRIMARY KEY, AUTO INCREMENT | Unique preference identifier |
+| `user_id` | `INTEGER` | FOREIGN KEY → `users.user_id`, CASCADE DELETE | User who owns preference |
+| `template_id` | `INTEGER` | FOREIGN KEY → `agent_templates.template_id`, CASCADE DELETE | Associated template |
+| `is_enabled` | `BOOLEAN` | DEFAULT: TRUE | Whether user has template enabled |
+| `usage_count` | `INTEGER` | DEFAULT: 0 | Number of times user used template |
+| `last_used_at` | `DATETIME` | NULLABLE | Last time user used template |
+| `created_at` | `DATETIME` | DEFAULT: UTC NOW | Preference creation time |
+**Relationships:**
+- **Many-to-One**: `user` (Preference → User)
+- **Many-to-One**: `template` (Preference → Template)
+**Constraints:**
+- **Unique**: `(user_id, template_id)` - One preference per user per template
+---
+## 🔗 Entity Relationship Diagram
+```
+Users (1) ──────────── (Many) Chats
+  │                           │
+  │                           ├── (Many) Messages
+  │                           │      │
+  │                           │      └── (1) MessageFeedback
+  │                           │
+  │                           └── (Many) CodeExecutions
+  │
+  ├── (Many) ModelUsage
+  │
+  ├── (Many) DeepAnalysisReports
+  │
+  └── (Many) UserTemplatePreferences
+               │
+               └── (Many) AgentTemplates
+```
+---
+## 📊 Database Performance
+### **Optimized Indexes**
+```sql
+-- High-performance queries
+CREATE INDEX idx_messages_chat_timestamp ON messages(chat_id, timestamp DESC);
+CREATE INDEX idx_model_usage_user_time ON model_usage(user_id, timestamp DESC);
+CREATE INDEX idx_model_usage_model_time ON model_usage(model_name, timestamp DESC);
+CREATE INDEX idx_reports_user_time ON deep_analysis_reports(user_id, created_at DESC);
+```
+### **Cascade Deletion Rules**
+| Parent → Child | Rule | Description |
+|----------------|------|-------------|
+| `users` → `chats` | CASCADE | Delete all user chats when user deleted |
+| `chats` → `messages` | CASCADE | Delete all chat messages when chat deleted |
+| `messages` → `feedback` | CASCADE | Delete feedback when message deleted |
+| `users` → `model_usage` | SET NULL | Keep usage records for analytics |
+---
+## 🛡️ Security & Maintenance
+### **Data Protection**
+- User data isolated by `user_id`
+- Sensitive fields require encryption in production
+- Automatic cleanup of anonymous data after 90 days
+### **Regular Maintenance**
+```sql
+-- Clean old anonymous chats
+DELETE FROM chats WHERE user_id IS NULL AND created_at < DATE_SUB(NOW(), INTERVAL 90 DAY);
+-- Update statistics for query optimization
+ANALYZE users, chats, messages, model_usage;
+```
+---
+This schema supports the full Auto-Analyst application with optimized performance, data integrity, and scalability for both development and production environments.

docs/system/shared_dataframe.md ADDED Viewed

	@@ -0,0 +1,91 @@

+# Shared Dataframe Between Agents
+This document explains how to use the shared dataframe functionality that allows one agent to create a processed dataframe (`df_processed`) that other agents can access and use.
+## Overview
+The Auto-Analyst system now supports sharing a processed dataframe between agents. This is useful when:
+1. One agent performs data preprocessing, cleaning, or feature engineering
+2. Subsequent agents need to use this processed data for analysis, visualization, or other tasks
+The first agent (typically Agent1) creates a dataframe called `df_processed`, and all subsequent agents can access this same dataframe without needing to reprocess the data.
+## How It Works
+1. Automatic variable sharing is handled through the `SHARED_CONTEXT` global dictionary in `format_response.py`
+2. When an agent executes Python code that creates a variable named `df_processed`, this variable is automatically stored in the shared context
+3. Subsequent agent code executions will have access to this `df_processed` variable
+## Implementation for Agent Developers
+### Agent1 (Data Processor)
+Agent1 should define a processed dataframe that will be used by subsequent agents:
+```python
+import pandas as pd
+import numpy as np
+# Do some data processing
+df_processed = df.copy()  # Start with a copy of the original dataframe
+df_processed = df_processed.dropna()  # Remove missing values
+df_processed['new_feature'] = df_processed['column_a'] / df_processed['column_b']
+print("Data processing complete. Created df_processed for other agents to use.")
+```
+### Agent2 (Data Consumer)
+Agent2 can access the `df_processed` dataframe created by Agent1:
+```python
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Access the shared df_processed dataframe
+print(f"Using shared df_processed with shape: {df_processed.shape}")
+# Create visualization using the processed data
+plt.figure(figsize=(10, 6))
+sns.scatterplot(data=df_processed, x='column_a', y='new_feature')
+plt.title('Analysis of Processed Data')
+plt.show()
+```
+## Technical Details
+The shared dataframe functionality is implemented through:
+1. A global `SHARED_CONTEXT` dictionary in `format_response.py`
+2. Modified `execute_code_from_markdown` function that checks for `df_processed` in the execution context
+3. Updated app.py to process agents in the correct order from the plan_list
+## Best Practices
+1. Name the shared dataframe consistently as `df_processed`
+2. Document what processing was done to create the shared dataframe
+3. Agent1 should print a message confirming that `df_processed` was created
+4. Agent2 should verify the structure of `df_processed` before using it (e.g., print its shape or columns)
+5. Keep processing in Agent1, analysis in Agent2 for clean separation of concerns
+## Example
+```python
+# Agent1 code
+import pandas as pd
+# Load and process data
+df_processed = df.copy()
+df_processed = df_processed[df_processed['price'] > 0]  # Remove invalid prices
+df_processed['price_per_sqft'] = df_processed['price'] / df_processed['sqft']
+print(f"Created df_processed with {len(df_processed)} rows after processing")
+# Agent2 code
+import plotly.express as px
+# Use the processed dataframe
+print(f"Using df_processed with {len(df_processed)} rows")
+fig = px.scatter(df_processed, x='sqft', y='price', color='price_per_sqft',
+                title='Price vs. Square Footage (Colored by Price per SqFt)')
+fig.show()
+```

docs/troubleshooting/troubleshooting.md ADDED Viewed

	@@ -0,0 +1,537 @@

+# Auto-Analyst Backend Troubleshooting Guide
+## 🚨 Common Startup Issues
+### 1. **Database Connection Problems**
+#### Problem: Database connection failed
+```
+❌ sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) no such table: users
+```
+**Solutions:**
+1. **Initialize Database**:
+   ```bash
+   python -c "
+   from src.db.init_db import init_db
+   init_db()
+   print('✅ Database initialized')
+   "
+   ```
+2. **Check Database File Permissions**:
+   ```bash
+   # For SQLite
+   ls -la auto_analyst.db
+   chmod 666 auto_analyst.db  # If needed
+   ```
+3. **Verify DATABASE_URL**:
+   ```bash
+   # Check .env file
+   cat .env | grep DATABASE_URL
+   # For PostgreSQL (production)
+   DATABASE_URL=postgresql://user:password@host:port/database
+   # For SQLite (development)
+   DATABASE_URL=sqlite:///./auto_analyst.db
+   ```
+#### Problem: PostgreSQL connection issues
+```
+❌ psycopg2.OperationalError: FATAL: database "auto_analyst" does not exist
+```
+**Solutions:**
+1. **Create Database**:
+   ```sql
+   -- Connect to PostgreSQL
+   psql -h localhost -U postgres
+   CREATE DATABASE auto_analyst;
+   \q
+   ```
+2. **Update Connection String**:
+   ```env
+   DATABASE_URL=postgresql://username:password@localhost:5432/auto_analyst
+   ```
+### 2. **Agent Template Loading Issues**
+#### Problem: No agents found
+```
+❌ RuntimeError: No agents loaded for user. Cannot proceed with analysis.
+```
+**Solutions:**
+1. **Initialize Default Agents**:
+   ```python
+   python -m scripts.populate_agent_templates
+   print('✅ Default agents initialized')
+   "
+   ```
+2. **Check Agent Templates in Database**:
+   ```python
+   python -c "
+   from src.db.init_db import session_factory
+   from src.db.schemas.models import AgentTemplate
+   session = session_factory()
+   templates = session.query(AgentTemplate).all()
+   print(f'Found {len(templates)} templates:')
+   for t in templates:
+       print(f'  - {t.template_name}: {t.is_active}')
+   session.close()
+   "
+   ```
+3. **Populate Templates from Config**:
+   ```bash
+   python scripts/populate_agent_templates.py
+   ```
+### 3. **API Key Issues**
+#### Problem: Missing API keys
+```
+❌ AuthenticationError: Invalid API key provided
+```
+**Solutions:**
+1. **Check Environment Variables**:
+   ```bash
+   # Verify API keys are set
+   echo $ANTHROPIC_API_KEY
+   echo $OPENAI_API_KEY
+   # Or check .env file
+   cat .env | grep API_KEY
+   ```
+2. **Add Missing Keys**:
+   ```env
+   # Add to .env file
+   ANTHROPIC_API_KEY=sk-ant-api03-...
+   OPENAI_API_KEY=sk-...
+   ADMIN_API_KEY=your_admin_key_here
+   ```
+3. **Test API Key Validity**:
+   ```python
+   python -c "
+   import os
+   from anthropic import Anthropic
+   client = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
+   try:
+       # Test call
+       response = client.messages.create(
+           model='claude-3-sonnet-20241022',
+           max_tokens=10,
+           messages=[{'role': 'user', 'content': 'Hello'}]
+       )
+       print('✅ Anthropic API key valid')
+   except Exception as e:
+       print(f'❌ Anthropic API key invalid: {e}')
+   "
+   ```
+## 🤖 Agent System Issues
+### 1. **Agent Not Found Errors**
+#### Problem: Specific agent not available
+```
+❌ KeyError: 'custom_agent' not found in loaded agents
+```
+**Solutions:**
+1. **Check Available Agents**:
+   ```python
+   python -c "
+   from src.agents.agents import load_user_enabled_templates_from_db
+   from src.db.init_db import session_factory
+   session = session_factory()
+   agents = load_user_enabled_templates_from_db('test_user', session)
+   print('Available agents:', list(agents.keys()))
+   session.close()
+   "
+   ```
+2. **Verify Agent Template Exists**:
+   ```python
+   python -c "
+   from src.db.init_db import session_factory
+   from src.db.schemas.models import AgentTemplate
+   session = session_factory()
+   agent = session.query(AgentTemplate).filter_by(template_name='custom_agent').first()
+   if agent:
+       print(f'Agent found: {agent.display_name}, Active: {agent.is_active}')
+   else:
+       print('Agent not found in database')
+   session.close()
+   "
+   ```
+3. **Add Missing Agent Template**:
+   ```python
+   # Add to agents_config.json or use database insertion
+   python scripts/populate_agent_templates.py
+   ```
+### 2. **Deep Analysis Failures**
+#### Problem: Deep analysis stops unexpectedly
+```
+❌ DeepAnalysisError: Agent execution failed at step 3
+```
+**Solutions:**
+1. **Check Agent Configuration**:
+   ```python
+   # Verify user has required agents enabled
+   python -c "
+   from src.agents.deep_agents import get_user_enabled_agent_names
+   from src.db.init_db import session_factory
+   session = session_factory()
+   agents = get_user_enabled_agent_names('test_user', session)
+   required = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent']
+   print('Required agents:', required)
+   print('Available agents:', agents)
+   print('Missing:', [a for a in required if a not in agents])
+   session.close()
+   "
+   ```
+2. **Increase Timeout Settings**:
+   ```python
+   # In deep_agents.py, increase timeout values
+   timeout = 300  # Increase from default
+   ```
+3. **Check Dataset Size**:
+   ```python
+   # Reduce dataset size for complex analysis
+   df_sample = df.sample(n=1000)  # Use sample for testing
+   ```
+## ⚡ Code Execution Problems
+### 1. **Code Execution Timeouts**
+#### Problem: Code execution takes too long
+```
+❌ TimeoutError: Code execution exceeded 120 seconds
+```
+**Solutions:**
+1. **Optimize Generated Code**:
+   - Use data sampling for large datasets
+   - Simplify analysis requirements
+   - Use sampling for large datasets
+2. **Check Resource Usage**:
+   ```python
+   import psutil
+   print(f"Memory usage: {psutil.virtual_memory().percent}%")
+   print(f"CPU usage: {psutil.cpu_percent()}%")
+   ```
+3. **Increase Timeout Settings**:
+   ```python
+   # In clean_and_store_code function
+   future.result(timeout=600)  # Increase timeout to 10 minutes
+   ```
+#### Problem: Import Errors in Generated Code
+```
+❌ ModuleNotFoundError: No module named 'some_library'
+```
+**Solutions:**
+1. **Check Available Libraries**:
+   ```python
+   # Available in execution environment:
+   import pandas as pd
+   import numpy as np
+   import plotly.express as px
+   import plotly.graph_objects as go
+   import sklearn
+   import statsmodels.api as sm
+   ```
+2. **Add Missing Dependencies**:
+   ```bash
+   pip install missing_library
+   ```
+3. **Update Execution Environment**:
+   ```python
+   # In clean_and_store_code function
+   exec_globals.update({
+       'new_library': __import__('new_library')
+   })
+   ```
+### 4. **Database Issues**
+#### Problem: Migration Errors
+```
+❌ alembic.util.exc.CommandError: Can't locate revision identified by 'xyz'
+```
+**Solutions:**
+1. **Reset Migration History**:
+   ```bash
+   # Delete migration files (except __init__.py)
+   rm migrations/versions/*.py
+   # Create new initial migration
+   alembic revision --autogenerate -m "initial migration"
+   alembic upgrade head
+   ```
+2. **Force Migration**:
+   ```bash
+   # Mark current state as up-to-date
+   alembic stamp head
+   ```
+3. **Recreate Database**:
+   ```bash
+   # For SQLite (development)
+   rm auto_analyst.db
+   python -c "from src.db.init_db import init_db; init_db()"
+   ```
+#### Problem: Constraint Violations
+```
+❌ IntegrityError: UNIQUE constraint failed
+```
+**Solutions:**
+1. **Check Existing Records**:
+   ```python
+   from src.db.init_db import session_factory
+   from src.db.schemas.models import AgentTemplate
+   session = session_factory()
+   templates = session.query(AgentTemplate).all()
+   for t in templates:
+       print(f"{t.template_name}: {t.template_id}")
+   session.close()
+   ```
+2. **Clean Duplicate Data**:
+   ```bash
+   python -c "
+   from src.db.init_db import session_factory
+   from src.db.schemas.models import AgentTemplate
+   session = session_factory()
+   # Remove duplicates based on template_name
+   seen = set()
+   for template in session.query(AgentTemplate).all():
+       if template.template_name in seen:
+           session.delete(template)
+       else:
+           seen.add(template.template_name)
+   session.commit()
+   session.close()
+   "
+   ```
+### 5. **Authentication and Authorization Issues**
+#### Problem: Unauthorized Access
+```
+❌ 401 Unauthorized: Invalid session
+```
+**Solutions:**
+1. **Check Session ID**:
+   ```python
+   # Ensure session_id is provided in request
+   headers = {"X-Session-ID": "your_session_id"}
+   # Or as query parameter: ?session_id=your_session_id
+   ```
+2. **Create Valid Session**:
+   ```bash
+   curl -X POST "http://localhost:8000/session_info" \
+        -H "Content-Type: application/json"
+   ```
+3. **Verify Admin API Key**:
+   ```bash
+   curl -X GET "http://localhost:8000/analytics/usage" \
+        -H "X-API-Key: your_admin_key"
+   ```
+### 6. **Performance Issues**
+#### Problem: Slow Response Times
+```
+⚠️ Request taking longer than expected
+```
+**Solutions:**
+1. **Enable Database Connection Pooling**:
+   ```python
+   # In init_db.py
+   engine = create_engine(
+       DATABASE_URL,
+       poolclass=QueuePool,
+       pool_size=10,
+       max_overflow=20
+   )
+   ```
+2. **Optimize Database Queries**:
+   ```python
+   # Use eager loading for relationships
+   session.query(User).options(joinedload(User.chats)).all()
+   ```
+3. **Add Response Caching**:
+   ```python
+   # Use local caching for expensive operations
+   @lru_cache(maxsize=100)
+   def expensive_operation(data):
+       return result
+   ```
+#### Problem: Memory Usage High
+```
+⚠️ Memory usage above 80%
+```
+**Solutions:**
+1. **Optimize DataFrame Operations**:
+   ```python
+   # Use chunking for large datasets
+   for chunk in pd.read_csv('file.csv', chunksize=1000):
+       process_chunk(chunk)
+   ```
+2. **Clear Unused Variables**:
+   ```python
+   # In code execution
+   del large_dataframe
+   import gc
+   gc.collect()
+   ```
+3. **Monitor Memory Usage**:
+   ```python
+   import psutil
+   import logging
+   memory_percent = psutil.virtual_memory().percent
+   if memory_percent > 80:
+       logging.warning(f"High memory usage: {memory_percent}%")
+   ```
+## 🔧 Debugging Tools and Commands
+### Health Check Commands
+```bash
+# Test basic connectivity
+curl http://localhost:8000/health
+# Check database status
+python -c "
+from src.db.init_db import session_factory
+try:
+    session = session_factory()
+    session.execute('SELECT 1')
+    print('✅ Database connection OK')
+    session.close()
+except Exception as e:
+    print(f'❌ Database error: {e}')
+"
+# Verify agent templates
+python -c "
+from src.db.init_db import session_factory
+from src.db.schemas.models import AgentTemplate
+session = session_factory()
+count = session.query(AgentTemplate).count()
+print(f'Agent templates in database: {count}')
+session.close()
+"
+```
+### Performance Monitoring
+```python
+# Memory and CPU monitoring
+import psutil
+import time
+def monitor_system():
+    while True:
+        cpu = psutil.cpu_percent(interval=1)
+        memory = psutil.virtual_memory()
+        print(f"CPU: {cpu}% | Memory: {memory.percent}% | Available: {memory.available // 1024 // 1024}MB")
+        time.sleep(5)
+# Run monitoring
+monitor_system()
+```
+### Database Inspection
+```python
+# Inspect database tables
+from src.db.init_db import session_factory
+from src.db.schemas.models import *
+session = session_factory()
+# Count records in each table
+tables = [User, Chat, Message, AgentTemplate, UserTemplatePreference, DeepAnalysisReport]
+for table in tables:
+    count = session.query(table).count()
+    print(f"{table.__name__}: {count} records")
+session.close()
+```
+### Log Analysis
+```bash
+# View recent logs
+tail -f logs/app.log
+# Search for errors
+grep "ERROR" logs/app.log | tail -20
+# Search for specific issues
+grep -i "agent" logs/app.log | grep -i "error"
+```
+## 🚀 Performance Optimization Tips
+### Database Optimization
+1. **Use Indexes**: Ensure frequently queried columns have indexes
+2. **Query Optimization**: Use `joinedload` for relationships
+3. **Connection Pooling**: Configure appropriate pool sizes
+4. **Batch Operations**: Use bulk operations for multiple records
+### Agent Performance
+1. **Async Execution**: Use async patterns for concurrent operations
+2. **Result Caching**: Cache expensive computations
+3. **Memory Management**: Clean up large objects after use
+4. **Code Optimization**: Simplify generated code for better performance
+### System Monitoring
+1. **Resource Tracking**: Monitor CPU, memory, and disk usage
+2. **Error Monitoring**: Set up alerting for critical errors
+3. **Performance Metrics**: Track response times and throughput
+4. **Usage Analytics**: Monitor feature usage and optimization opportunities
+This troubleshooting guide covers the most common issues you'll encounter with the Auto-Analyst backend. For additional help, check the system logs and use the debugging tools provided.

entrypoint_local.sh ADDED Viewed

	@@ -0,0 +1,175 @@

+#!/bin/bash
+# Entrypoint script for Auto-Analyst backend
+# This script safely initializes the database and starts the application
+# SAFE for PostgreSQL/RDS - only modifies SQLite databases
+set -e  # Exit on any error
+echo "🚀 Starting Auto-Analyst Backend..."
+# Function to run safe database initialization
+init_production_database() {
+    echo "🔧 Running SAFE database initialization..."
+    # Run the safe initialization script
+    python scripts/init_production_db.py
+    # Don't fail if database initialization has issues - let app try to start
+    if [ $? -eq 0 ]; then
+        echo "✅ Database initialization completed successfully"
+    else
+        echo "⚠️  Database initialization had issues, but continuing..."
+        echo "📋 App will start but some features may not work properly"
+    fi
+}
+# Function to verify basic app imports work
+verify_app_imports() {
+    echo "🔍 Verifying application imports..."
+    python -c "
+try:
+    from app import app
+    print('✅ Main application imports successful')
+except Exception as e:
+    print(f'❌ Application import failed: {e}')
+    exit(1)
+" || {
+    echo "❌ Critical application import failure - cannot start"
+    exit 1
+}
+}
+# Function to verify database connectivity (non-failing)
+verify_database_connectivity() {
+    echo "🔗 Testing database connectivity..."
+    python -c "
+try:
+    from src.db.init_db import get_session, is_postgres_db
+    from src.db.schemas.models import AgentTemplate
+    db_type = 'PostgreSQL/RDS' if is_postgres_db() else 'SQLite'
+    print(f'🗄️  Database type: {db_type}')
+    session = get_session()
+    # Try to query templates if table exists
+    try:
+        template_count = session.query(AgentTemplate).count()
+        print(f'✅ Database connected. Found {template_count} templates.')
+    except Exception as table_error:
+        print(f'⚠️  Database connected but template table issue: {table_error}')
+        print('📋 Template functionality may not work')
+    finally:
+        session.close()
+except Exception as e:
+    print(f'⚠️  Database connectivity issue: {e}')
+    print('📋 App will start but database features may not work')
+"
+    # Don't exit on database connectivity issues - let app try to start
+}
+# Function to populate agents and templates for development (SQLite only)
+# Uses agents_config.json if available, falls back to legacy method
+populate_agents_templates() {
+    echo "🔧 Checking if agents/templates need to be populated..."
+    python -c "
+try:
+    from src.db.init_db import DATABASE_URL
+    from src.db.schemas.models import AgentTemplate
+    from src.db.init_db import session_factory
+    # Check database type
+    if DATABASE_URL.startswith('sqlite'):
+        print('🔍 SQLite database detected - checking template population')
+        session = session_factory()
+        try:
+            template_count = session.query(AgentTemplate).count()
+            if template_count == 0:
+                print('📋 No templates found - populating agents and templates...')
+                session.close()
+                exit(1)  # Signal that population is needed
+            else:
+                print(f'✅ Found {template_count} templates - population not needed')
+                session.close()
+                exit(0)  # Signal that population is not needed
+        except Exception as e:
+            print(f'⚠️  Error checking templates: {e}')
+            print('📋 Will attempt to populate anyway')
+            session.close()
+            exit(1)  # Signal that population is needed
+    else:
+        print('🔍 PostgreSQL/RDS detected - skipping auto-population')
+        exit(0)  # Signal that population is not needed
+except Exception as e:
+    print(f'❌ Error during template check: {e}')
+    exit(0)  # Don't fail startup, just skip population
+"
+    # Check if population is needed (exit code 1 means yes)
+    if [ $? -eq 1 ]; then
+        echo "🚀 Running agent/template population for SQLite..."
+        # Check if agents_config.json exists (try multiple locations)
+        if [ -f "agents_config.json" ] || [ -f "/app/agents_config.json" ] || [ -f "../agents_config.json" ]; then
+            echo "📖 Found agents_config.json - validating configuration..."
+            # Validate configuration first
+            python scripts/populate_agent_templates.py validate
+            validation_result=$?
+            if [ $validation_result -eq 0 ]; then
+                echo "✅ Configuration valid - proceeding with sync"
+                python scripts/populate_agent_templates.py sync
+            else
+                echo "⚠️  Configuration validation failed - attempting sync anyway"
+                python scripts/populate_agent_templates.py sync
+            fi
+        else
+            echo "⚠️  agents_config.json not found - trying legacy method"
+            python scripts/populate_agent_templates.py
+        fi
+        if [ $? -eq 0 ]; then
+            echo "✅ Agent/template population completed successfully"
+        else
+            echo "⚠️  Agent/template population had issues, but continuing..."
+            echo "📋 You may need to populate templates manually"
+            echo "💡 Tip: Ensure agents_config.json exists in the backend directory"
+        fi
+    fi
+}
+# Check if we need to find agents_config.json from space root
+if [ ! -f "/app/agents_config.json" ]; then
+    echo "⚠️  agents_config.json not found in container - checking build issues"
+    echo "📁 Files in /app directory:"
+    ls -la /app/ | head -10
+else
+    echo "✅ agents_config.json found in container"
+fi
+# Main startup sequence
+echo "🔧 Initializing production environment..."
+# Verify critical imports first
+verify_app_imports
+# Initialize database safely (won't modify RDS)
+init_production_database
+# Test database connectivity (non-failing)
+verify_database_connectivity
+# Populate agents and templates for development (SQLite only)
+populate_agents_templates
+echo "🎯 Starting FastAPI application..."
+echo "🌐 Application will be available on port 7860"
+# Start the FastAPI application
+exec uvicorn app:app --host 0.0.0.0 --port 7860

images/AI snapshot-chat.png ADDED Viewed

Git LFS Details

SHA256: d4bacf72e135239daf86d45a93ee6798aa40e2376498a7944d32b3392ac0ab19
Pointer size: 131 Bytes
Size of remote file: 305 kB

images/Auto-Analyst Banner.png ADDED Viewed

Git LFS Details

SHA256: 30a322031c1e8eca20f202d2bda534a921c5c4556ee9ae81f0fcabdb156ab2cd
Pointer size: 131 Bytes
Size of remote file: 184 kB

images/Auto-analyst-poster.png ADDED Viewed

Git LFS Details

SHA256: 7ba24a0c523fd084d27fd3f13ae3887095556d16bbcc2b8502fee3b9c8907cdc
Pointer size: 131 Bytes
Size of remote file: 184 kB

images/Auto-analysts icon small.png ADDED Viewed

Git LFS Details

SHA256: 5e1f25fd62bef47e389023315b1e3994321ea21eec44b70d9630154672a46d8f
Pointer size: 130 Bytes
Size of remote file: 10.1 kB

images/auto-analyst logo.png ADDED Viewed

Git LFS Details

SHA256: 7459da6f81ce2674f304693de6a04c7e0526c92fa7ef5c3b19d98d7a989e7fb7
Pointer size: 130 Bytes
Size of remote file: 28.1 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+aiofiles==24.1.0
+beautifulsoup4==4.13.4
+dspy==2.6.27
+litellm==1.75.2
+email_validator==2.2.0
+fastapi==0.115.5
+fastapi-cli==0.0.7
+FastAPI-SQLAlchemy==0.2.1
+fastapi-sso==0.16.0
+groq==0.18.0
+gunicorn==23.0.0
+huggingface-hub==0.30.2
+joblib==1.4.2
+llama-cloud==0.1.19
+llama-cloud-services==0.6.21
+llama-index==0.12.14
+llama-index-agent-openai==0.4.2
+llama-index-cli==0.4.1
+llama-index-core==0.12.34.post1
+llama-index-embeddings-openai==0.3.1
+llama-index-indices-managed-llama-cloud==0.6.4
+llama-index-llms-openai==0.3.14
+llama-index-multi-modal-llms-openai==0.4.2
+llama-index-program-openai==0.3.1
+llama-index-question-gen-openai==0.3.0
+Markdown==3.7
+matplotlib==3.10.0
+matplotlib-inline==0.1.7
+numpy==2.2.2
+openpyxl==3.1.2
+xlrd==2.0.1
+openai==1.97.0
+pandas==2.2.3
+polars==1.31.0
+pillow==11.1.0
+plotly==5.24.1
+psycopg2==2.9.10
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+requests==2.32.3
+scikit-learn==1.6.1
+scipy==1.15.1
+seaborn==0.13.2
+setuptools==75.8.0
+SQLAlchemy==2.0.37
+statsmodels==0.14.4
+tabulate==0.9.0
+threadpoolctl==3.5.0
+tiktoken==0.8.0
+tokenizers==0.21.0
+tqdm==4.67.1
+urllib3==2.4.0
+uvicorn==0.29.0
+websockets>=13.1.0
+wheel==0.45.1
+xgboost-cpu==3.0.2
+bokeh==3.7.3
+pymc==5.23.0
+lightgbm==4.6.0
+arviz==0.21.0
+optuna==4.3.0
+litellm[proxy]

scripts/__init__.py ADDED Viewed

File without changes

scripts/format_response.py ADDED Viewed

	@@ -0,0 +1,1112 @@

+import re
+import json
+import sys
+import contextlib
+from io import StringIO
+import time
+import logging
+from src.utils.logger import Logger
+import textwrap
+logger = Logger(__name__, level="INFO", see_time=False, console_log=False)
+@contextlib.contextmanager
+def stdoutIO(stdout=None):
+    old = sys.stdout
+    if stdout is None:
+        stdout = StringIO()
+    sys.stdout = stdout
+    yield stdout
+    sys.stdout = old
+# Precompile regex patterns for better performance
+SENSITIVE_MODULES = re.compile(r"(os|sys|subprocess|dotenv|requests|http|socket|smtplib|ftplib|telnetlib|paramiko)")
+IMPORT_PATTERN = re.compile(r"^\s*import\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
+FROM_IMPORT_PATTERN = re.compile(r"^\s*from\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
+DYNAMIC_IMPORT_PATTERN = re.compile(r"__import__\s*\(\s*['\"](" + SENSITIVE_MODULES.pattern + r")['\"].*?\)")
+ENV_ACCESS_PATTERN = re.compile(r"(os\.getenv|os\.environ|load_dotenv|\.__import__\s*\(\s*['\"]os['\"].*?\.environ)")
+FILE_ACCESS_PATTERN = re.compile(r"(open\(|read\(|write\(|file\(|with\s+open)")
+# Enhanced API key detection patterns
+API_KEY_PATTERNS = [
+    # Direct key assignments
+    re.compile(r"(?i)(api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)s?\s*=\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
+    # Function calls with keys
+    re.compile(r"(?i)\.set_api_key\(\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
+    # Dictionary assignments
+    re.compile(r"(?i)['\"](?:api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)['\"](?:\s*:\s*)[\"\'][\w\-\+\/\=]{8,}[\"\']"),
+    # Common key formats (base64-like, hex)
+    re.compile(r"[\"\'](?:[A-Za-z0-9\+\/\=]{32,}|[0-9a-fA-F]{32,})[\"\']"),
+    # Bearer token pattern
+    re.compile(r"[\"\'](Bearer\s+[\w\-\+\/\=]{8,})[\"\']"),
+    # Inline URL with auth
+    re.compile(r"https?:\/\/[\w\-\+\/\=]{8,}@")
+]
+# Network request patterns
+NETWORK_REQUEST_PATTERNS = re.compile(r"(requests\.|urllib\.|http\.|\.post\(|\.get\(|\.connect\()")
+def check_security_concerns(code_str):
+    """Check code for security concerns and return info about what was found"""
+    security_concerns = {
+        "has_concern": False,
+        "messages": [],
+        "blocked_imports": False,
+        "blocked_dynamic_imports": False,
+        "blocked_env_access": False,
+        "blocked_file_access": False,
+        "blocked_api_keys": False,
+        "blocked_network": False
+    }
+    # Check for sensitive imports
+    if IMPORT_PATTERN.search(code_str) or FROM_IMPORT_PATTERN.search(code_str):
+        security_concerns["has_concern"] = True
+        security_concerns["blocked_imports"] = True
+        security_concerns["messages"].append("Sensitive module imports blocked")
+    # Check for __import__ bypass technique
+    if DYNAMIC_IMPORT_PATTERN.search(code_str):
+        security_concerns["has_concern"] = True
+        security_concerns["blocked_dynamic_imports"] = True
+        security_concerns["messages"].append("Dynamic import of sensitive modules blocked")
+    # Check for environment variables access
+    if ENV_ACCESS_PATTERN.search(code_str):
+        security_concerns["has_concern"] = True
+        security_concerns["blocked_env_access"] = True
+        security_concerns["messages"].append("Environment variables access blocked")
+    # Check for file operations
+    if FILE_ACCESS_PATTERN.search(code_str):
+        security_concerns["has_concern"] = True
+        security_concerns["blocked_file_access"] = True
+        security_concerns["messages"].append("File operations blocked")
+    # Check for API key patterns
+    for pattern in API_KEY_PATTERNS:
+        if pattern.search(code_str):
+            security_concerns["has_concern"] = True
+            security_concerns["blocked_api_keys"] = True
+            security_concerns["messages"].append("API key/token usage blocked")
+            break
+    # Check for network requests
+    if NETWORK_REQUEST_PATTERNS.search(code_str):
+        security_concerns["has_concern"] = True
+        security_concerns["blocked_network"] = True
+        security_concerns["messages"].append("Network requests blocked")
+    return security_concerns
+def clean_code_for_security(code_str, security_concerns):
+    """Apply security modifications to the code based on detected concerns"""
+    modified_code = code_str
+    # Block sensitive imports if needed
+    if security_concerns["blocked_imports"]:
+        modified_code = IMPORT_PATTERN.sub(r'# BLOCKED: import \1\n', modified_code)
+        modified_code = FROM_IMPORT_PATTERN.sub(r'# BLOCKED: from \1\n', modified_code)
+    # Block dynamic imports if needed
+    if security_concerns["blocked_dynamic_imports"]:
+        modified_code = DYNAMIC_IMPORT_PATTERN.sub(r'"BLOCKED_DYNAMIC_IMPORT"', modified_code)
+    # Block environment access if needed
+    if security_concerns["blocked_env_access"]:
+        modified_code = ENV_ACCESS_PATTERN.sub(r'"BLOCKED_ENV_ACCESS"', modified_code)
+    # Block file operations if needed
+    if security_concerns["blocked_file_access"]:
+        modified_code = FILE_ACCESS_PATTERN.sub(r'"BLOCKED_FILE_ACCESS"', modified_code)
+    # Block API keys if needed
+    if security_concerns["blocked_api_keys"]:
+        for pattern in API_KEY_PATTERNS:
+            modified_code = pattern.sub(r'"BLOCKED_API_KEY"', modified_code)
+    # Block network requests if needed
+    if security_concerns["blocked_network"]:
+        modified_code = NETWORK_REQUEST_PATTERNS.sub(r'"BLOCKED_NETWORK_REQUEST"', modified_code)
+    # Add warning banner if needed
+    if security_concerns["has_concern"]:
+        security_message = "⚠️ SECURITY WARNING: " + ". ".join(security_concerns["messages"]) + "."
+        modified_code = f"print('{security_message}')\n\n" + modified_code
+    return modified_code
+def format_correlation_output(text):
+    """Format correlation matrix output for better readability"""
+    lines = text.split('\n')
+    formatted_lines = []
+    for line in lines:
+        # Skip empty lines at the beginning
+        if not line.strip() and not formatted_lines:
+            continue
+        if not line.strip():
+            formatted_lines.append(line)
+            continue
+        # Check if this line contains correlation values or variable names
+        stripped_line = line.strip()
+        parts = stripped_line.split()
+        if len(parts) > 1:
+            # Check if this is a header line with variable names
+            if all(part.replace('_', '').replace('-', '').isalpha() for part in parts):
+                # This is a header row with variable names
+                formatted_header = f"{'':12}"  # Empty first column for row labels
+                for part in parts:
+                    formatted_header += f"{part:>12}"
+                formatted_lines.append(formatted_header)
+            elif any(char.isdigit() for char in stripped_line) and ('.' in stripped_line or '-' in stripped_line):
+                # This looks like a correlation line with numbers
+                row_name = parts[0] if parts else ""
+                values = parts[1:] if len(parts) > 1 else []
+                formatted_row = f"{row_name:<12}"
+                for value in values:
+                    try:
+                        val = float(value)
+                        formatted_row += f"{val:>12.3f}"
+                    except ValueError:
+                        formatted_row += f"{value:>12}"
+                formatted_lines.append(formatted_row)
+            else:
+                # Other lines (like titles)
+                formatted_lines.append(line)
+        else:
+            formatted_lines.append(line)
+    return '\n'.join(formatted_lines)
+def format_summary_stats(text):
+    """Format summary statistics for better readability"""
+    lines = text.split('\n')
+    formatted_lines = []
+    for line in lines:
+        if not line.strip():
+            formatted_lines.append(line)
+            continue
+        # Check if this is a header line with statistical terms only (missing first column)
+        stripped_line = line.strip()
+        if any(stat in stripped_line.lower() for stat in ['count', 'mean', 'median', 'std', 'min', 'max', '25%', '50%', '75%']):
+            parts = stripped_line.split()
+            # Check if this is a header row (starts with statistical terms)
+            if parts and parts[0].lower() in ['count', 'mean', 'median', 'std', 'min', 'max', '25%', '50%', '75%']:
+                # This is a header row - add proper spacing
+                formatted_header = f"{'':12}"  # Empty first column for row labels
+                for part in parts:
+                    formatted_header += f"{part:>15}"
+                formatted_lines.append(formatted_header)
+            else:
+                # This is a data row - format normally
+                row_name = parts[0] if parts else ""
+                values = parts[1:] if len(parts) > 1 else []
+                formatted_row = f"{row_name:<12}"
+                for value in values:
+                    try:
+                        if '.' in value or 'e' in value.lower():
+                            val = float(value)
+                            if abs(val) >= 1000000:
+                                formatted_row += f"{val:>15.2e}"
+                            elif abs(val) >= 1:
+                                formatted_row += f"{val:>15.2f}"
+                            else:
+                                formatted_row += f"{val:>15.6f}"
+                        else:
+                            val = int(value)
+                            formatted_row += f"{val:>15}"
+                    except ValueError:
+                        formatted_row += f"{value:>15}"
+                formatted_lines.append(formatted_row)
+        else:
+            # Other lines (titles, etc.) - keep as is
+            formatted_lines.append(line)
+    return '\n'.join(formatted_lines)
+def clean_print_statements(code_block):
+    """
+    This function cleans up any `print()` statements that might contain unwanted `\n` characters.
+    It ensures print statements are properly formatted without unnecessary newlines.
+    """
+    # This regex targets print statements, even if they have newlines inside
+    return re.sub(r'print\((.*?)(\\n.*?)(.*?)\)', r'print(\1\3)', code_block, flags=re.DOTALL)
+def remove_code_block_from_summary(summary):
+    # use regex to remove code block from summary list
+    summary = re.sub(r'```python\n(.*?)\n```', '', summary)
+    return summary.split("\n")
+def remove_main_block(code):
+    # Match the __main__ block
+    pattern = r'(?m)^if\s+__name__\s*==\s*["\']__main__["\']\s*:\s*\n((?:\s+.*\n?)*)'
+    match = re.search(pattern, code)
+    if match:
+        main_block = match.group(1)
+        # Dedent the code block inside __main__
+        dedented_block = textwrap.dedent(main_block)
+        # Remove \n from any print statements in the block (also handling multiline print cases)
+        dedented_block = clean_print_statements(dedented_block)
+        # Replace the block in the code
+        cleaned_code = re.sub(pattern, dedented_block, code)
+        # Optional: Remove leading newlines if any
+        cleaned_code = cleaned_code.strip()
+        return cleaned_code
+    return code
+def format_code_block(code_str):
+    code_clean = re.sub(r'^```python\n?', '', code_str, flags=re.MULTILINE)
+    code_clean = re.sub(r'\n```$', '', code_clean)
+    return f'\n{code_clean}\n'
+def format_code_backticked_block(code_str):
+    # Add None check at the beginning
+    if code_str is None:
+        return
+    # Add type check to ensure it's a string
+    if not isinstance(code_str, str):
+        return f"```python\n# Invalid code type: {type(code_str)}\n```"
+    code_clean = re.sub(r'^```python\n?', '', code_str, flags=re.MULTILINE)
+    code_clean = re.sub(r'\n```$', '', code_clean)
+    # Only match assignments at top level (not indented)
+    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
+    # Remove reading the csv file if it's already in the context
+    modified_code = re.sub(r"df\s*=\s*pd\.read_csv\([\"\'].*?[\"\']\).*?(\n|$)", '', code_clean)
+    modified_code = re.sub(r'^(\s*)(df\s*=.*)$', r'\1# \2', code_clean, flags=re.MULTILINE)
+    # Only match assignments at top level (not indented)
+    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
+    modified_code = re.sub(
+        r"^df\s*=\s*pd\.DataFrame\(\s*\)\s*(#.*)?$",
+        '',
+        modified_code,
+        flags=re.MULTILINE
+    )
+    # # Remove sample dataframe lines with multiple array values
+    modified_code = re.sub(r"^# Sample DataFrames?.*?(\n|$)", '', modified_code, flags=re.MULTILINE | re.IGNORECASE)
+    # # Remove plt.show() statements
+    modified_code = re.sub(r"plt\.show\(\).*?(\n|$)", '', modified_code)
+    # remove main
+    code_clean = remove_main_block(modified_code)
+    return f'```python\n{code_clean}\n```'
+def execute_code_from_markdown(code_str, dataframe=None):
+    import pandas as pd
+    import plotly.express as px
+    import plotly
+    import plotly.graph_objects as go
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    import numpy as np
+    import re
+    import traceback
+    import sys
+    from io import StringIO, BytesIO
+    import base64
+    # Check for security concerns in the code
+    security_concerns = check_security_concerns(code_str)
+    # Apply security modifications to the code
+    modified_code = clean_code_for_security(code_str, security_concerns)
+    # Enhanced print function that detects and formats tabular data
+    captured_outputs = []
+    original_print = print
+    # Set pandas display options for full table display
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', 20)  # Limit to 20 rows instead of unlimited
+    pd.set_option('display.width', None)
+    pd.set_option('display.max_colwidth', 50)
+    pd.set_option('display.expand_frame_repr', False)
+    def enhanced_print(*args, **kwargs):
+        # Convert all args to strings
+        str_args = [str(arg) for arg in args]
+        output_text = kwargs.get('sep', ' ').join(str_args)
+        # Special case for DataFrames - use pipe delimiter and clean format
+        if isinstance(args[0], pd.DataFrame) and len(args) == 1:
+            # Format DataFrame with pipe delimiter using to_csv for reliable column separation
+            df = args[0]
+            # Use StringIO to capture CSV output with pipe delimiter
+            from io import StringIO
+            csv_buffer = StringIO()
+            # Export to CSV with pipe delimiter, preserving index
+            df.to_csv(csv_buffer, sep='|', index=True, float_format='%.6g')
+            csv_output = csv_buffer.getvalue()
+            # Clean up the CSV output - remove quotes and extra formatting
+            lines = csv_output.strip().split('\n')
+            cleaned_lines = []
+            for line in lines:
+                # Remove any quotes that might have been added by to_csv
+                clean_line = line.replace('"', '')
+                # Split by pipe, strip whitespace from each part, then rejoin
+                parts = [part.strip() for part in clean_line.split('|')]
+                cleaned_lines.append(' | '.join(parts))
+            output_text = '\n'.join(cleaned_lines)
+            captured_outputs.append(f"<TABLE_START>\n{output_text}\n<TABLE_END>")
+            original_print(output_text)
+            return
+        # Detect if this looks like tabular data (generic approach)
+        is_table = False
+        # Check for table patterns:
+        # 1. Multiple lines with consistent spacing
+        lines = output_text.split('\n')
+        if len(lines) > 2:
+            # Count lines that look like they have multiple columns (2+ spaces between words)
+            multi_column_lines = sum(1 for line in lines if len(line.split()) > 1 and '  ' in line)
+            if multi_column_lines >= 2:  # At least 2 lines with multiple columns
+                is_table = True
+            # Check for pandas DataFrame patterns like index with column names
+            if any(re.search(r'^\s*\d+\s+', line) for line in lines):
+                # Look for lines starting with an index number followed by spaces
+                is_table = True
+            # Look for table-like structured output with multiple rows of similar format
+            if len(lines) >= 3:
+                # Sample a few lines to check for consistent structure
+                sample_lines = [lines[i] for i in range(min(len(lines), 5)) if i < len(lines) and lines[i].strip()]
+                # Check for consistent whitespace patterns
+                if len(sample_lines) >= 2:
+                    # Get positions of whitespace groups in first line
+                    whitespace_positions = []
+                    for i, line in enumerate(sample_lines):
+                        if not line.strip():
+                            continue
+                        positions = [m.start() for m in re.finditer(r'\s{2,}', line)]
+                        if i == 0:
+                            whitespace_positions = positions
+                        elif len(positions) == len(whitespace_positions):
+                            # Check if whitespace positions are roughly the same
+                            is_similar = all(abs(pos - whitespace_positions[j]) <= 3
+                                            for j, pos in enumerate(positions)
+                                            if j < len(whitespace_positions))
+                            if is_similar:
+                                is_table = True
+        # 2. Contains common table indicators
+        if any(indicator in output_text.lower() for indicator in [
+            'count', 'mean', 'std', 'min', 'max', '25%', '50%', '75%',  # Summary stats
+            'correlation', 'corr',  # Correlation tables
+            'coefficient', 'r-squared', 'p-value',  # Regression tables
+        ]):
+            is_table = True
+        # 3. Has many decimal numbers (likely a data table)
+        if output_text.count('.') > 5 and len(lines) > 2:
+            is_table = True
+        # If we have detected a table, convert space-delimited to pipe-delimited format
+        if is_table:
+            # Convert the table to pipe-delimited format for better parsing in frontend
+            formatted_lines = []
+            for line in lines:
+                if not line.strip():
+                    formatted_lines.append(line)  # Keep empty lines
+                    continue
+                # Split by multiple spaces and join with pipe delimiter
+                parts = re.split(r'\s{2,}', line.strip())
+                if parts:
+                    formatted_lines.append(" | ".join(parts))
+                else:
+                    formatted_lines.append(line)
+            # Use the pipe-delimited format
+            output_text = "\n".join(formatted_lines)
+            # Format and mark the output for table processing in UI
+            captured_outputs.append(f"<TABLE_START>\n{output_text}\n<TABLE_END>")
+        else:
+            captured_outputs.append(output_text)
+        # Also use original print for stdout capture
+        original_print(*args, **kwargs)
+    # Custom matplotlib capture function
+    def capture_matplotlib_chart():
+        """Capture current matplotlib figure as base64 encoded image"""
+        try:
+            fig = plt.gcf()  # Get current figure
+            if fig.get_axes():  # Check if figure has any plots
+                buffer = BytesIO()
+                fig.savefig(buffer, format='png', dpi=150, bbox_inches='tight',
+                           facecolor='white', edgecolor='none')
+                buffer.seek(0)
+                img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+                buffer.close()
+                plt.close(fig)  # Close the figure to free memory
+                return img_base64
+            return None
+        except Exception:
+            return None
+    # Store original plt.show function
+    original_plt_show = plt.show
+    def custom_plt_show(*args, **kwargs):
+        """Custom plt.show that captures the chart instead of displaying it"""
+        img_base64 = capture_matplotlib_chart()
+        if img_base64:
+            matplotlib_outputs.append(img_base64)
+        # Don't call original show to prevent display
+    context = {
+        'pd': pd,
+        'px': px,
+        'go': go,
+        'plt': plt,
+        'plotly': plotly,
+        '__builtins__': __builtins__,
+        '__import__': __import__,
+        'sns': sns,
+        'np': np,
+        'json_outputs': [],  # List to store multiple Plotly JSON outputs
+        'matplotlib_outputs': [],  # List to store matplotlib chart images as base64
+        'print': enhanced_print  # Replace print with our enhanced version
+    }
+    # Add matplotlib_outputs to local scope for the custom show function
+    matplotlib_outputs = context['matplotlib_outputs']
+    # Replace plt.show with our custom function
+    plt.show = custom_plt_show
+    # Modify code to store multiple JSON outputs
+    modified_code = re.sub(
+        r'(\w*_?)fig(\w*)\.show\(\)',
+        r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
+        modified_code
+    )
+    modified_code = re.sub(
+        r'(\w*_?)fig(\w*)\.to_html\(.*?\)',
+        r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
+        modified_code
+    )
+    # Remove reading the csv file if it's already in the context
+    modified_code = re.sub(r"df\s*=\s*pd\.read_csv\([\"\'].*?[\"\']\).*?(\n|$)", '', modified_code)
+    # Only match assignments at top level (not indented)
+    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
+    modified_code = re.sub(
+        r"^df\s*=\s*pd\.DataFrame\(\s*\)\s*(#.*)?$",
+        '',
+        modified_code,
+        flags=re.MULTILINE
+    )
+    # Custom display function for DataFrames to show head + tail for large datasets
+    original_repr = pd.DataFrame.__repr__
+    def custom_df_repr(self):
+        if len(self) > 15:
+            # For large DataFrames, show first 10 and last 5 rows
+            head_part = self.head(10)
+            tail_part = self.tail(5)
+            head_str = head_part.__repr__()
+            tail_str = tail_part.__repr__()
+            # Extract just the data rows (skip the header from tail)
+            tail_lines = tail_str.split('\n')
+            tail_data = '\n'.join(tail_lines[1:])  # Skip header line
+            return f"{head_str}\n...\n{tail_data}"
+        else:
+            return original_repr(self)
+    # Apply custom representation temporarily
+    pd.DataFrame.__repr__ = custom_df_repr
+    # If a dataframe is provided, add it to the context
+    if dataframe is not None:
+        context['df'] = dataframe
+    # remove pd.read_csv() if it's already in the context
+    modified_code = re.sub(r"pd\.read_csv\(\s*[\"\'].*?[\"\']\s*\)", '', modified_code)
+    # Remove sample dataframe lines with multiple array values
+    modified_code = re.sub(r"^# Sample DataFrames?.*?(\n|$)", '', modified_code, flags=re.MULTILINE | re.IGNORECASE)
+    # Replace plt.savefig() calls with plt.show() to ensure plots are displayed
+    modified_code = re.sub(r'plt\.savefig\([^)]*\)', 'plt.show()', modified_code)
+    # Instead of removing plt.show(), keep them - they'll be handled by our custom function
+    # Also handle seaborn plots that might not have explicit plt.show()
+    # Add plt.show() after seaborn plot functions if not already present
+    seaborn_plot_functions = [
+        'sns.scatterplot', 'sns.lineplot', 'sns.barplot', 'sns.boxplot', 'sns.violinplot',
+        'sns.stripplot', 'sns.swarmplot', 'sns.pointplot', 'sns.catplot', 'sns.relplot',
+        'sns.displot', 'sns.histplot', 'sns.kdeplot', 'sns.ecdfplot', 'sns.rugplot',
+        'sns.distplot', 'sns.jointplot', 'sns.pairplot', 'sns.FacetGrid', 'sns.PairGrid',
+        'sns.heatmap', 'sns.clustermap', 'sns.regplot', 'sns.lmplot', 'sns.residplot'
+    ]
+    # Add automatic plt.show() after seaborn plots if not already present
+    for func in seaborn_plot_functions:
+        pattern = rf'({re.escape(func)}\([^)]*\)(?:\.[^(]*\([^)]*\))*)'
+        def add_show(match):
+            plot_call = match.group(1)
+            # Check if the next non-empty line already has plt.show()
+            return f'{plot_call}\nplt.show()'
+        modified_code = re.sub(pattern, add_show, modified_code)
+    # Only add df = pd.read_csv() if no dataframe was provided and the code contains pd.read_csv
+    if dataframe is None and 'pd.read_csv' not in modified_code:
+        modified_code = re.sub(
+            r'import pandas as pd',
+            r'import pandas as pd\n\n# Read Housing.csv\ndf = pd.read_csv("Housing.csv")',
+            modified_code
+        )
+    # Identify code blocks by comments
+    code_blocks = []
+    current_block = []
+    current_block_name = "unknown"
+    for line in modified_code.splitlines():
+        # Check if line contains a block identifier comment
+        block_match = re.match(r'^# ([a-zA-Z_]+)_agent code start', line)
+        if block_match:
+            # If we had a previous block, save it
+            if current_block:
+                code_blocks.append((current_block_name, '\n'.join(current_block)))
+            # Start a new block
+            current_block_name = block_match.group(1)
+            current_block = []
+        else:
+            current_block.append(line)
+    # Add the last block if it exists
+    if current_block:
+        code_blocks.append((current_block_name, '\n'.join(current_block)))
+    # Execute each code block separately
+    all_outputs = []
+    for block_name, block_code in code_blocks:
+        try:
+            # Clear captured outputs for each block
+            captured_outputs.clear()
+            with stdoutIO() as s:
+                exec(block_code, context)  # Execute the block
+            # Get both stdout and our enhanced captured outputs
+            stdout_output = s.getvalue()
+            # Combine outputs, preferring our enhanced format when available
+            if captured_outputs:
+                combined_output = '\n'.join(captured_outputs)
+            else:
+                combined_output = stdout_output
+            all_outputs.append((block_name, combined_output, None))  # None means no error
+        except Exception as e:
+            # Reset pandas options in case of error
+            pd.reset_option('display.max_columns')
+            pd.reset_option('display.max_rows')
+            pd.reset_option('display.width')
+            pd.reset_option('display.max_colwidth')
+            pd.reset_option('display.expand_frame_repr')
+            # Restore original DataFrame representation in case of error
+            pd.DataFrame.__repr__ = original_repr
+            # Restore original plt.show
+            plt.show = original_plt_show
+            error_traceback = traceback.format_exc()
+            # Extract error message and error type
+            error_message = str(e)
+            error_type = type(e).__name__
+            error_lines = error_traceback.splitlines()
+            # Format error with context of the actual code
+            formatted_error = f"Error in {block_name}_agent: {error_message}\n"
+            # Add first few lines of traceback
+            first_lines = error_lines[:3]
+            formatted_error += "\n".join(first_lines) + "\n"
+            # Parse problem variables/values from the error message
+            problem_vars = []
+            # Look for common error patterns
+            if "not in index" in error_message:
+                # Extract column names for 'not in index' errors
+                column_match = re.search(r"\['([^']+)'(?:, '([^']+)')*\] not in index", error_message)
+                if column_match:
+                    problem_vars = [g for g in column_match.groups() if g is not None]
+                    # Look for DataFrame accessing operations and list/variable definitions
+                    potential_lines = []
+                    code_lines = block_code.splitlines()
+                    # First, find all DataFrame column access patterns
+                    df_access_patterns = []
+                    for i, line in enumerate(code_lines):
+                        # Find DataFrame variables from patterns like "df_name[...]" or "df_name.loc[...]"
+                        df_matches = re.findall(r'(\w+)(?:\[|\.)(?:loc|iloc|columns|at|iat|\.select)', line)
+                        for df_var in df_matches:
+                            df_access_patterns.append((i, df_var))
+                        # Find variables that might contain column lists
+                        for var in problem_vars:
+                            if re.search(r'\b(numeric_columns|categorical_columns|columns|features|cols)\b', line):
+                                potential_lines.append(i)
+                    # Identify the most likely problematic lines
+                    if df_access_patterns:
+                        for i, df_var in df_access_patterns:
+                            if any(re.search(rf'{df_var}\[.*?\]', line) for line in code_lines):
+                                potential_lines.append(i)
+                    # If no specific lines found yet, look for any DataFrame operations
+                    if not potential_lines:
+                        for i, line in enumerate(code_lines):
+                            if re.search(r'(?:corr|drop|groupby|pivot|merge|join|concat|apply|map|filter|loc|iloc)\(', line):
+                                potential_lines.append(i)
+                    # Sort and deduplicate
+                    potential_lines = sorted(set(potential_lines))
+            elif "name" in error_message and "is not defined" in error_message:
+                # Extract variable name for NameError
+                var_match = re.search(r"name '([^']+)' is not defined", error_message)
+                if var_match:
+                    problem_vars = [var_match.group(1)]
+            elif "object has no attribute" in error_message:
+                # Extract attribute name for AttributeError
+                attr_match = re.search(r"'([^']+)' object has no attribute '([^']+)'", error_message)
+                if attr_match:
+                    problem_vars = [f"{attr_match.group(1)}.{attr_match.group(2)}"]
+            # Scan code for lines containing the problem variables
+            if problem_vars:
+                formatted_error += "\nProblem likely in these lines:\n"
+                code_lines = block_code.splitlines()
+                problem_lines = []
+                # First try direct variable references
+                direct_matches = False
+                for i, line in enumerate(code_lines):
+                    if any(var in line for var in problem_vars):
+                        direct_matches = True
+                        # Get line and its context (1 line before and after)
+                        start_idx = max(0, i-1)
+                        end_idx = min(len(code_lines), i+2)
+                        for j in range(start_idx, end_idx):
+                            line_prefix = f"{j+1}: "
+                            if j == i:  # The line with the problem variable
+                                problem_lines.append(f"{line_prefix}>>> {code_lines[j]} <<<")
+                            else:
+                                problem_lines.append(f"{line_prefix}{code_lines[j]}")
+                        problem_lines.append("") # Empty line between sections
+                # If no direct matches found but we identified potential problematic lines for DataFrame issues
+                if not direct_matches and "not in index" in error_message and 'potential_lines' in locals():
+                    for i in potential_lines:
+                        start_idx = max(0, i-1)
+                        end_idx = min(len(code_lines), i+2)
+                        for j in range(start_idx, end_idx):
+                            line_prefix = f"{j+1}: "
+                            if j == i:
+                                problem_lines.append(f"{line_prefix}>>> {code_lines[j]} <<<")
+                            else:
+                                problem_lines.append(f"{line_prefix}{code_lines[j]}")
+                        problem_lines.append("") # Empty line between sections
+                if problem_lines:
+                    formatted_error += "\n".join(problem_lines)
+                else:
+                    # Special message for column errors when we can't find the exact reference
+                    if "not in index" in error_message:
+                        formatted_error += (f"Unable to locate direct reference to columns: {', '.join(problem_vars)}\n"
+                                           f"Check for variables that might contain these column names (like numeric_columns, "
+                                           f"categorical_columns, etc.)\n")
+                    else:
+                        formatted_error += f"Unable to locate lines containing: {', '.join(problem_vars)}\n"
+            else:
+                # If we couldn't identify specific variables, check for line numbers in traceback
+                for line in reversed(error_lines):  # Search from the end of traceback
+                    # Look for user code references in the traceback
+                    if ', line ' in line and '<module>' in line:
+                        try:
+                            line_num = int(re.search(r', line (\d+)', line).group(1))
+                            code_lines = block_code.splitlines()
+                            if 0 < line_num <= len(code_lines):
+                                line_idx = line_num - 1
+                                start_idx = max(0, line_idx-2)
+                                end_idx = min(len(code_lines), line_idx+3)
+                                formatted_error += "\nProblem at this location:\n"
+                                for i in range(start_idx, end_idx):
+                                    line_prefix = f"{i+1}: "
+                                    if i == line_idx:
+                                        formatted_error += f"{line_prefix}>>> {code_lines[i]} <<<\n"
+                                    else:
+                                        formatted_error += f"{line_prefix}{code_lines[i]}\n"
+                                break
+                        except (ValueError, AttributeError, IndexError):
+                            pass
+            # Add the last few lines of the traceback
+            formatted_error += "\nFull error details:\n"
+            last_lines = error_lines[-3:]
+            formatted_error += "\n".join(last_lines)
+            all_outputs.append((block_name, None, formatted_error))
+    # Reset pandas options after execution
+    pd.reset_option('display.max_columns')
+    pd.reset_option('display.max_rows')
+    pd.reset_option('display.width')
+    pd.reset_option('display.max_colwidth')
+    pd.reset_option('display.expand_frame_repr')
+    # Restore original DataFrame representation
+    pd.DataFrame.__repr__ = original_repr
+    # Restore original plt.show
+    plt.show = original_plt_show
+    # Compile all outputs and errors
+    output_text = ""
+    json_outputs = context.get('json_outputs', [])
+    matplotlib_outputs = context.get('matplotlib_outputs', [])
+    error_found = False
+    for block_name, output, error in all_outputs:
+        if error:
+            output_text += f"\n\n=== ERROR IN {block_name.upper()}_AGENT ===\n{error}\n"
+            error_found = True
+        elif output:
+            output_text += f"\n\n=== OUTPUT FROM {block_name.upper()}_AGENT ===\n{output}\n"
+    if error_found:
+        return output_text, [], []
+    else:
+        return output_text, json_outputs, matplotlib_outputs
+def format_plan_instructions(plan_instructions):
+    """
+    Format any plan instructions (JSON string or dict) into markdown sections per agent.
+    """
+    # Parse input into a dict
+    if "basic_qa_agent" in str(plan_instructions):
+        return "**Non-Data Request**: Please ask a data related query, don't waste credits!"
+    try:
+        if isinstance(plan_instructions, str):
+            try:
+                instructions = json.loads(plan_instructions)
+            except json.JSONDecodeError as e:
+                # Try to clean the string if it's not valid JSON
+                cleaned_str = plan_instructions.strip()
+                if cleaned_str.startswith("'") and cleaned_str.endswith("'"):
+                    cleaned_str = cleaned_str[1:-1]
+                try:
+                    instructions = json.loads(cleaned_str)
+                except json.JSONDecodeError:
+                    raise ValueError(f"Invalid JSON format in plan instructions: {str(e)}")
+        elif isinstance(plan_instructions, dict):
+            instructions = plan_instructions
+        else:
+            raise TypeError(f"Unsupported plan instructions type: {type(plan_instructions)}")
+    except Exception as e:
+        raise ValueError(f"Error processing plan instructions: {str(e)} + {dspy.settings.lm} ")
+    # logger.log_message(f"Plan instructions: {instructions}", level=logging.INFO)
+    markdown_lines = []
+    for agent, content in instructions.items():
+        if agent != 'basic_qa_agent':
+            agent_title = agent.replace('_', ' ').title()
+            markdown_lines.append(f"#### {agent_title}")
+            if isinstance(content, dict):
+                # Handle 'create' key
+                create_vals = content.get('create', [])
+                if create_vals:
+                    markdown_lines.append(f"- **Create**:")
+                    for item in create_vals:
+                        markdown_lines.append(f"  - {item}")
+                else:
+                    markdown_lines.append(f"- **Create**: None")
+                # Handle 'use' key
+                use_vals = content.get('use', [])
+                if use_vals:
+                    markdown_lines.append(f"- **Use**:")
+                    for item in use_vals:
+                        markdown_lines.append(f"  - {item}")
+                else:
+                    markdown_lines.append(f"- **Use**: None")
+                # Handle 'instruction' key
+                instr = content.get('instruction')
+                if isinstance(instr, str) and instr:
+                    markdown_lines.append(f"- **Instruction**: {instr}")
+                else:
+                    markdown_lines.append(f"- **Instruction**: None")
+            else:
+                # Fallback for non-dict content
+                markdown_lines.append(f"- {content}")
+            markdown_lines.append("")  # blank line between agents
+        else:
+            markdown_lines.append(f"**Non-Data Request**: {content.get('instruction')}")
+    return "\n".join(markdown_lines).strip()
+def format_complexity(instructions):
+    markdown_lines = []
+    # Extract complexity from various possible locations in the structure
+    if isinstance(instructions, dict):
+        # Case 1: Direct complexity field
+        if 'complexity' in instructions:
+            complexity = instructions['complexity']
+        # Case 2: Complexity in 'plan' object
+        elif 'plan' in instructions and isinstance(instructions['plan'], dict):
+            if 'complexity' in instructions['plan']:
+                complexity = instructions['plan']['complexity']
+        else:
+            complexity = "unrelated"
+    if 'plan' in instructions and isinstance(instructions['plan'], str) and "basic_qa_agent" in instructions['plan']:
+        complexity = "unrelated"
+    if complexity:
+        # Pink color scheme variations
+        color_map = {
+            "unrelated": "#FFB6B6",  # Light pink
+            "basic": "#FF9E9E",      # Medium pink
+            "intermediate": "#FF7F7F", # Main pink
+            "advanced": "#FF5F5F"    # Dark pink
+        }
+        indicator_map = {
+            "unrelated": "○",
+            "basic": "●",
+            "intermediate": "●●",
+            "advanced": "●●●"
+        }
+        color = color_map.get(complexity.lower(), "#FFB6B6")  # Default to light pink
+        indicator = indicator_map.get(complexity.lower(), "○")
+        # Slightly larger display with pink styling
+        markdown_lines.append(f"<div style='color: {color}; border: 2px solid {color}; padding: 2px 8px; border-radius: 12px; display: inline-block; font-size: 14.4px;'>{indicator} {complexity}</div>\n")
+        return "\n".join(markdown_lines).strip()
+def format_response_to_markdown(api_response, agent_name = None, dataframe=None):
+    try:
+        markdown = []
+        # logger.log_message(f"API response for {agent_name} at {time.strftime('%Y-%m-%d %H:%M:%S')}: {api_response}", level=logging.INFO)
+        if isinstance(api_response, dict):
+            for key in api_response:
+                if "error" in api_response[key] and "litellm.RateLimitError" in api_response[key]['error'].lower():
+                    return f"**Error**: Rate limit exceeded. Please try switching models from the settings."
+                # You can add more checks here if needed for other keys
+        # Handle error responses
+        if isinstance(api_response, dict) and "error" in api_response:
+            return f"**Error**: {api_response['error']}"
+        if "response" in api_response and isinstance(api_response['response'], str):
+            if any(err in api_response['response'].lower() for err in ["auth", "api", "lm"]):
+                return "**Error**: Authentication failed. Please check your API key in settings and try again."
+            if "model" in api_response['response'].lower():
+                return "**Error**: Model configuration error. Please verify your model selection in settings."
+        for agent, content in api_response.items():
+            agent = agent.split("__")[0] if "__" in agent else agent
+            if "memory" in agent or not content:
+                continue
+            if "complexity" in content:
+                markdown.append(f"{format_complexity(content)}\n")
+            markdown.append(f"\n## {agent.replace('_', ' ').title()}\n")
+            if agent == "analytical_planner":
+                logger.log_message(f"Analytical planner content: {content}", level=logging.INFO)
+                if 'plan_desc' in content:
+                    markdown.append(f"### Reasoning\n{content['plan_desc']}\n")
+                if 'plan_instructions' in content:
+                    markdown.append(f"{format_plan_instructions(content['plan_instructions'])}\n")
+                else:
+                    markdown.append(f"### Reasoning\n{content['rationale']}\n")
+            else:
+                if "rationale" in content:
+                    markdown.append(f"### Reasoning\n{content['rationale']}\n")
+            if 'code' in content and content['code'] is not None:
+                markdown.append(f"### Code Implementation\n{format_code_backticked_block(content['code'])}\n")
+            if 'answer' in content:
+                markdown.append(f"### Answer\n{content['answer']}\n Please ask a query about the data")
+            if 'summary' in content:
+                import re
+                summary_text = content['summary']
+                summary_text = re.sub(r'```python\n(.*?)\n```', '', summary_text, flags=re.DOTALL)
+                markdown.append("### Summary\n")
+                # Extract pre-list intro, bullet points, and post-list text
+                intro_match = re.split(r'\(\d+\)', summary_text, maxsplit=1)
+                if len(intro_match) > 1:
+                    intro_text = intro_match[0].strip()
+                    rest_text = "(1)" + intro_match[1]  # reattach for bullet parsing
+                else:
+                    intro_text = summary_text.strip()
+                    rest_text = ""
+                if intro_text:
+                    markdown.append(f"{intro_text}\n")
+                # Split bullets at numbered items like (1)...(8)
+                bullets = re.split(r'\(\d+\)', rest_text)
+                bullets = [b.strip(" ,.\n") for b in bullets if b.strip()]
+                # Check for post-list content (anything after the last number)
+                for i, bullet in enumerate(bullets):
+                    markdown.append(f"* {bullet}\n")
+            if 'refined_complete_code' in content and 'summary' in content:
+                try:
+                    if content['refined_complete_code'] is not None and content['refined_complete_code'] != "":
+                        clean_code = format_code_block(content['refined_complete_code'])
+                        markdown_code = format_code_backticked_block(content['refined_complete_code'])
+                        output, json_outputs, matplotlib_outputs = execute_code_from_markdown(clean_code, dataframe)
+                    elif "```python" in content['summary']:
+                        clean_code = format_code_block(content['summary'])
+                        markdown_code = format_code_backticked_block(content['summary'])
+                        output, json_outputs, matplotlib_outputs = execute_code_from_markdown(clean_code, dataframe)
+                except Exception as e:
+                    logger.log_message(f"Error in execute_code_from_markdown: {str(e)}", level=logging.ERROR)
+                    markdown_code = f"**Error**: {str(e)}"
+                    output = None
+                    json_outputs = []
+                    matplotlib_outputs = []
+                    # continue
+                if markdown_code is not None:
+                    markdown.append(f"### Refined Complete Code\n{markdown_code}\n")
+                if output:
+                    markdown.append("### Execution Output\n")
+                    markdown.append(f"```output\n{output}\n```\n")
+                if json_outputs:
+                    markdown.append("### Plotly JSON Outputs\n")
+                    for idx, json_output in enumerate(json_outputs):
+                        markdown.append(f"```plotly\n{json_output}\n```\n")
+                if matplotlib_outputs:
+                    markdown.append("### Matplotlib/Seaborn Charts\n")
+                    for idx, img_base64 in enumerate(matplotlib_outputs):
+                        markdown.append(f"```matplotlib\n{img_base64}\n```\n")
+            # if agent_name is not None:
+            #     if f"memory_{agent_name}" in api_response:
+            #         markdown.append(f"### Memory\n{api_response[f'memory_{agent_name}']}\n")
+    except Exception as e:
+        logger.log_message(f"Error in format_response_to_markdown: {str(e)}", level=logging.ERROR)
+        return f"error formating markdown {str(e)}"
+    # logger.log_message(f"Generated markdown content for agent '{agent_name}' at {time.strftime('%Y-%m-%d %H:%M:%S')}: {markdown}, length: {len(markdown)}", level=logging.INFO)
+    if not markdown or len(markdown) <= 1:
+        logger.log_message(
+            f"Invalid markdown content for agent '{agent_name}' at {time.strftime('%Y-%m-%d %H:%M:%S')}: "
+            f"Content: '{markdown}', Type: {type(markdown)}, Length: {len(markdown) if markdown else 0}, "
+            f"API Response: {api_response}",
+            level=logging.ERROR
+        )
+        return ""
+    return '\n'.join(markdown)
+# Example usage with dummy data
+if __name__ == "__main__":
+    sample_response = {
+        "code_combiner_agent": {
+            "reasoning": "Sample reasoning for multiple charts.",
+            "refined_complete_code": """
+```python
+import plotly.express as px
+import pandas as pd
+# Sample Data
+df = pd.DataFrame({'Category': ['A', 'B', 'C'], 'Values': [10, 20, 30]})
+# First Chart
+fig = px.bar(df, x='Category', y='Values', title='Bar Chart')
+fig.show()
+# Second Chart
+fig2 = px.pie(df, values='Values', names='Category', title='Pie Chart')
+fig2.show()
+```
+"""
+        }
+    }
+    formatted_md = format_response_to_markdown(sample_response)

scripts/init_production_db.py ADDED Viewed

	@@ -0,0 +1,191 @@

+#!/usr/bin/env python3
+"""
+Production database initialization script.
+This ensures templates are populated properly and verifies database health.
+SAFE for PostgreSQL/RDS - only creates tables on SQLite databases.
+"""
+import sys
+import os
+import logging
+from datetime import datetime, UTC
+# Add the project root to the Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from src.db.init_db import init_db, session_factory, engine, is_postgres_db
+from src.db.schemas.models import Base, AgentTemplate, UserTemplatePreference
+from scripts.populate_agent_templates import populate_templates
+from sqlalchemy import inspect, text
+from src.utils.logger import Logger
+logger = Logger("init_production_db", see_time=True, console_log=True)
+def get_database_type():
+    """Get the database type (sqlite or postgresql)."""
+    try:
+        if is_postgres_db():
+            return "postgresql"
+        else:
+            return "sqlite"
+    except Exception as e:
+        logger.log_message(f"Error determining database type: {e}", logging.ERROR)
+        return "unknown"
+def check_table_exists(table_name: str) -> bool:
+    """Check if a table exists in the database."""
+    try:
+        inspector = inspect(engine)
+        tables = inspector.get_table_names()
+        return table_name in tables
+    except Exception as e:
+        logger.log_message(f"Error checking table existence: {e}", logging.ERROR)
+        return False
+def verify_database_schema():
+    """Verify that all required tables exist. Only create tables on SQLite."""
+    db_type = get_database_type()
+    logger.log_message(f"🔍 Verifying database schema for {db_type.upper()} database...", logging.INFO)
+    required_tables = [
+        'users', 'chats', 'messages', 'model_usage', 'code_executions',
+        'message_feedback', 'deep_analysis_reports', 'agent_templates',
+        'user_template_preferences'
+    ]
+    missing_tables = []
+    existing_tables = []
+    for table in required_tables:
+        if not check_table_exists(table):
+            missing_tables.append(table)
+            logger.log_message(f"❌ Missing table: {table}", logging.WARNING)
+        else:
+            existing_tables.append(table)
+            logger.log_message(f"✅ Table exists: {table}", logging.INFO)
+    if missing_tables:
+        if db_type == "sqlite":
+            logger.log_message(f"🔧 Creating missing tables on SQLite: {missing_tables}", logging.INFO)
+            try:
+                # Safe to create tables on SQLite
+                Base.metadata.create_all(engine)
+                logger.log_message("✅ All tables created successfully on SQLite", logging.INFO)
+            except Exception as e:
+                logger.log_message(f"❌ Failed to create tables: {e}", logging.ERROR)
+                raise
+        else:
+            # PostgreSQL/RDS - DO NOT create tables automatically
+            logger.log_message(f"⚠️  WARNING: Missing tables detected in {db_type.upper()} database: {missing_tables}", logging.WARNING)
+            logger.log_message("🛡️  SAFETY: Not creating tables automatically on PostgreSQL/RDS", logging.INFO)
+            logger.log_message("📋 Please ensure these tables exist in your RDS database:", logging.INFO)
+            for table in missing_tables:
+                logger.log_message(f"   - {table}", logging.INFO)
+            # Continue without failing - the app might still work with existing tables
+            if 'agent_templates' in missing_tables or 'user_template_preferences' in missing_tables:
+                logger.log_message("⚠️  Template functionality may not work without agent_templates and user_template_preferences tables", logging.WARNING)
+    else:
+        logger.log_message(f"✅ All required tables exist in {db_type.upper()} database", logging.INFO)
+def verify_template_data():
+    """Verify that agent templates are populated. Safe for all database types."""
+    logger.log_message("📋 Verifying template data...", logging.INFO)
+    session = session_factory()
+    try:
+        # Check if agent_templates table exists before querying
+        if not check_table_exists('agent_templates'):
+            logger.log_message("⚠️  agent_templates table does not exist, skipping template verification", logging.WARNING)
+            return
+        template_count = session.query(AgentTemplate).filter(AgentTemplate.is_active == True).count()
+        logger.log_message(f"📊 Found {template_count} active templates", logging.INFO)
+        if template_count == 0:
+            logger.log_message("🔧 No templates found, populating...", logging.INFO)
+            try:
+                populate_templates()
+                # Verify population worked
+                new_count = session.query(AgentTemplate).filter(AgentTemplate.is_active == True).count()
+                logger.log_message(f"✅ Templates populated. Total active templates: {new_count}", logging.INFO)
+            except Exception as e:
+                logger.log_message(f"❌ Template population failed: {e}", logging.ERROR)
+                logger.log_message("⚠️  App will continue but template functionality may not work", logging.WARNING)
+        else:
+            logger.log_message("✅ Templates already populated", logging.INFO)
+    except Exception as e:
+        logger.log_message(f"❌ Error verifying templates: {e}", logging.ERROR)
+        logger.log_message("⚠️  Template verification failed, but app will continue", logging.WARNING)
+    finally:
+        session.close()
+def test_template_api_functionality():
+    """Test that template-related database operations work. Safe for all database types."""
+    logger.log_message("🧪 Testing template API functionality...", logging.INFO)
+    session = session_factory()
+    try:
+        # Check if agent_templates table exists before testing
+        if not check_table_exists('agent_templates'):
+            logger.log_message("⚠️  agent_templates table does not exist, skipping API test", logging.WARNING)
+            return
+        # Test basic template query
+        templates = session.query(AgentTemplate).filter(AgentTemplate.is_active == True).limit(5).all()
+        logger.log_message(f"✅ Successfully queried {len(templates)} templates", logging.INFO)
+        if templates:
+            sample_template = templates[0]
+            logger.log_message(f"📄 Sample template: {sample_template.template_name} - {sample_template.display_name}", logging.INFO)
+        else:
+            logger.log_message("📭 No templates found in database", logging.INFO)
+    except Exception as e:
+        logger.log_message(f"❌ Template API test failed: {e}", logging.ERROR)
+        logger.log_message("⚠️  Template API may not work properly", logging.WARNING)
+    finally:
+        session.close()
+def run_safe_initialization():
+    """Run safe database initialization that respects production databases."""
+    db_type = get_database_type()
+    logger.log_message(f"🚀 Starting SAFE database initialization for {db_type.upper()}...", logging.INFO)
+    if db_type == "postgresql":
+        logger.log_message("🛡️  PostgreSQL/RDS detected - running in SAFE mode", logging.INFO)
+        logger.log_message("📋 Will only verify schema and populate templates", logging.INFO)
+    elif db_type == "sqlite":
+        logger.log_message("💽 SQLite detected - full initialization mode", logging.INFO)
+    try:
+        # Step 1: Initialize database (safe for all types)
+        logger.log_message("Step 1: Basic database initialization", logging.INFO)
+        if db_type == "sqlite":
+            init_db()  # Only run full init on SQLite
+        else:
+            logger.log_message("Skipping init_db() for PostgreSQL (safety)", logging.INFO)
+        # Step 2: Verify schema (safe - only creates tables on SQLite)
+        logger.log_message("Step 2: Schema verification", logging.INFO)
+        verify_database_schema()
+        # Step 3: Verify template data (safe for all types)
+        logger.log_message("Step 3: Template data verification", logging.INFO)
+        verify_template_data()
+        # Step 4: Test functionality (safe for all types)
+        logger.log_message("Step 4: Functionality testing", logging.INFO)
+        test_template_api_functionality()
+        logger.log_message(f"🎉 Safe database initialization completed for {db_type.upper()}!", logging.INFO)
+    except Exception as e:
+        logger.log_message(f"💥 Database initialization failed: {e}", logging.ERROR)
+        logger.log_message("⚠️  App may still start but some features might not work", logging.WARNING)
+        # Don't raise - let the app try to start anyway
+if __name__ == "__main__":
+    run_safe_initialization()

scripts/populate_agent_templates.py ADDED Viewed

	@@ -0,0 +1,508 @@

+#!/usr/bin/env python3
+"""
+SQLite Agent Template Management Script
+Similar to manage_templates.py but optimized for local SQLite development.
+Reads agents from agents_config.json and manages SQLite database.
+"""
+import sys
+import os
+import json
+import requests
+from datetime import datetime, UTC
+from pathlib import Path
+# Add the project root to the Python path
+script_dir = os.path.dirname(os.path.abspath(__file__))
+backend_dir = os.path.dirname(script_dir)
+project_root = os.path.dirname(os.path.dirname(backend_dir))
+# Change to backend directory to ensure proper path resolution
+os.chdir(backend_dir)
+sys.path.append(backend_dir)
+from src.db.init_db import session_factory, DATABASE_URL
+from src.db.schemas.models import AgentTemplate
+from sqlalchemy.exc import IntegrityError
+def get_database_type():
+    """Detect database type from DATABASE_URL"""
+    if DATABASE_URL.startswith('postgresql'):
+        return "postgresql"
+    elif DATABASE_URL.startswith('sqlite'):
+        return "sqlite"
+    else:
+        return "unknown"
+def load_agents_config():
+    """Load agents configuration from agents_config.json"""
+    # Try multiple possible locations for agents_config.json
+    possible_paths = [
+        os.path.join(backend_dir, 'agents_config.json'),  # Backend directory (copied file)
+        os.path.join(project_root, 'agents_config.json'),  # Project root
+        '/app/agents_config.json',  # Container root (HF Spaces)
+        'agents_config.json'  # Current directory
+    ]
+    config_path = None
+    for path in possible_paths:
+        if os.path.exists(path):
+            config_path = path
+            print(f"📖 Found agents_config.json at: {config_path}")
+            break
+    if not config_path:
+        paths_str = '\n  '.join(possible_paths)
+        raise FileNotFoundError(f"agents_config.json not found in any of these locations:\n  {paths_str}")
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = json.load(f)
+    return config.get('templates', [])
+def download_icon(icon_url, template_name):
+    """Download icon from URL and save to frontend directory"""
+    if not icon_url or not icon_url.startswith('http'):
+        print(f"⏭️  Skipping icon download for {template_name} (not a URL: {icon_url})")
+        return icon_url
+    try:
+        # Determine frontend directory
+        frontend_dir = os.path.join(project_root, 'Auto-Analyst-CS', 'auto-analyst-frontend')
+        public_dir = os.path.join(frontend_dir, 'public')
+        if not os.path.exists(public_dir):
+            print(f"⚠️  Frontend public directory not found: {public_dir}")
+            return icon_url
+        # Parse the path from icon_url
+        if '/icons/templates/' in icon_url:
+            relative_path = icon_url.split('/icons/templates/')[-1]
+            icon_dir = os.path.join(public_dir, 'icons', 'templates')
+        else:
+            # Fallback: use filename from URL
+            filename = icon_url.split('/')[-1]
+            if not filename.endswith(('.svg', '.png', '.jpg', '.jpeg')):
+                filename += '.svg'
+            relative_path = filename
+            icon_dir = os.path.join(public_dir, 'icons', 'templates')
+        # Create icon directory if it doesn't exist
+        os.makedirs(icon_dir, exist_ok=True)
+        # Download and save icon
+        icon_path = os.path.join(icon_dir, relative_path)
+        # Skip if already exists
+        if os.path.exists(icon_path):
+            print(f"📁 Icon already exists: {relative_path}")
+            return f"/icons/templates/{relative_path}"
+        response = requests.get(icon_url, timeout=10)
+        response.raise_for_status()
+        with open(icon_path, 'wb') as f:
+            f.write(response.content)
+        print(f"📥 Downloaded icon: {relative_path}")
+        return f"/icons/templates/{relative_path}"
+    except Exception as e:
+        print(f"❌ Failed to download icon for {template_name}: {str(e)}")
+        return icon_url
+def sync_agents_from_config():
+    """Synchronize agents from agents_config.json to SQLite database"""
+    session = session_factory()
+    db_type = get_database_type()
+    # if db_type != "sqlite":
+    #     print(f"⚠️  This script is designed for SQLite, but detected {db_type}")
+    #     print("Consider using manage_templates.py for PostgreSQL")
+    #     return
+    try:
+        # Load configuration
+        print(f"📖 Loading agents from agents_config.json...")
+        templates_config = load_agents_config()
+        if not templates_config:
+            print("❌ No templates found in agents_config.json")
+            return
+        # Track statistics
+        created_count = 0
+        updated_count = 0
+        skipped_count = 0
+        print(f"🔍 Processing {len(templates_config)} templates for SQLite database")
+        print(f"📋 Database URL: {DATABASE_URL}")
+        # Group templates by category for display
+        categories = {}
+        for template_data in templates_config:
+            category = template_data.get('category', 'Uncategorized')
+            if category not in categories:
+                categories[category] = []
+            categories[category].append(template_data)
+        # Process templates by category
+        for category, templates in categories.items():
+            print(f"\n📁 {category}:")
+            for template_data in templates:
+                template_name = template_data["template_name"]
+                # Check if template already exists
+                existing = session.query(AgentTemplate).filter(
+                    AgentTemplate.template_name == template_name
+                ).first()
+                # Download icon if it's a URL
+                icon_url = template_data.get("icon_url", "")
+                if icon_url.startswith('http'):
+                    icon_url = download_icon(icon_url, template_name)
+                if existing:
+                    # Update existing template
+                    existing.display_name = template_data["display_name"]
+                    existing.description = template_data["description"]
+                    existing.icon_url = icon_url
+                    existing.prompt_template = template_data["prompt_template"]
+                    existing.category = template_data.get("category", "Uncategorized")
+                    existing.is_premium_only = template_data.get("is_premium_only", False)
+                    existing.is_active = template_data.get("is_active", True)
+                    existing.variant_type = template_data.get("variant_type", "individual")
+                    existing.base_agent = template_data.get("base_agent", template_name)
+                    existing.updated_at = datetime.now(UTC)
+                    variant_icon = "🤖" if template_data.get("variant_type") == "planner" else "👤"
+                    premium_icon = "🔒" if template_data.get("is_premium_only") else "🆓"
+                    print(f"🔄 Updated: {template_name} {variant_icon} {premium_icon}")
+                    updated_count += 1
+                else:
+                    # Create new template
+                    template = AgentTemplate(
+                        template_name=template_name,
+                        display_name=template_data["display_name"],
+                        description=template_data["description"],
+                        icon_url=icon_url,
+                        prompt_template=template_data["prompt_template"],
+                        category=template_data.get("category", "Uncategorized"),
+                        is_premium_only=template_data.get("is_premium_only", False),
+                        is_active=template_data.get("is_active", True),
+                        variant_type=template_data.get("variant_type", "individual"),
+                        base_agent=template_data.get("base_agent", template_name),
+                        created_at=datetime.now(UTC),
+                        updated_at=datetime.now(UTC)
+                    )
+                    session.add(template)
+                    variant_icon = "🤖" if template_data.get("variant_type") == "planner" else "👤"
+                    premium_icon = "🔒" if template_data.get("is_premium_only") else "🆓"
+                    print(f"✅ Created: {template_name} {variant_icon} {premium_icon}")
+                    created_count += 1
+        # Handle removals if specified in config
+        remove_list = []
+        # Re-load the full config to check for removals
+        try:
+            full_config_path = None
+            possible_paths = [
+                os.path.join(backend_dir, 'agents_config.json'),
+                os.path.join(project_root, 'agents_config.json'),
+                '/app/agents_config.json',
+                'agents_config.json'
+            ]
+            for path in possible_paths:
+                if os.path.exists(path):
+                    full_config_path = path
+                    break
+            if full_config_path:
+                with open(full_config_path, 'r', encoding='utf-8') as f:
+                    full_config = json.load(f)
+                    if 'remove' in full_config:
+                        remove_list = full_config['remove']
+        except Exception as e:
+            print(f"⚠️ Could not load removal list: {e}")
+        # Remove templates marked for removal
+        if remove_list:
+            print(f"\n🗑️ --- Processing Removals ---")
+            for template_name in remove_list:
+                existing = session.query(AgentTemplate).filter(
+                    AgentTemplate.template_name == template_name
+                ).first()
+                if existing:
+                    session.delete(existing)
+                    print(f"🗑️ Removed: {template_name}")
+                else:
+                    print(f"⏭️ Skipping removal: {template_name} (not found)")
+        # Commit all changes
+        session.commit()
+        print(f"\n📊 --- Summary ---")
+        print(f"✅ Templates created: {created_count}")
+        print(f"🔄 Templates updated: {updated_count}")
+        print(f"⏭️ Templates skipped: {skipped_count}")
+        # Show total count in database
+        total_count = session.query(AgentTemplate).count()
+        free_count = session.query(AgentTemplate).filter(AgentTemplate.is_premium_only == False).count()
+        premium_count = session.query(AgentTemplate).filter(AgentTemplate.is_premium_only == True).count()
+        individual_count = session.query(AgentTemplate).filter(AgentTemplate.variant_type == 'individual').count()
+        planner_count = session.query(AgentTemplate).filter(AgentTemplate.variant_type == 'planner').count()
+        print(f"🗄️ Total templates in database: {total_count}")
+        print(f"🆓 Free templates: {free_count}")
+        print(f"🔒 Premium templates: {premium_count}")
+        print(f"👤 Individual variants: {individual_count}")
+        print(f"🤖 Planner variants: {planner_count}")
+    except Exception as e:
+        session.rollback()
+        print(f"❌ Error syncing templates: {str(e)}")
+        raise
+    finally:
+        session.close()
+def list_templates():
+    """List all existing templates in the database"""
+    session = session_factory()
+    try:
+        templates = session.query(AgentTemplate).order_by(
+            AgentTemplate.category,
+            AgentTemplate.is_premium_only,
+            AgentTemplate.template_name
+        ).all()
+        if not templates:
+            print("No templates found in database.")
+            return
+        print(f"\n--- Existing Templates ({len(templates)} total) ---")
+        current_category = None
+        for template in templates:
+            if template.category != current_category:
+                current_category = template.category
+                print(f"\n📁 {current_category}:")
+            status = "🔒 Premium" if template.is_premium_only else "🆓 Free"
+            active = "✅ Active" if template.is_active else "❌ Inactive"
+            variant = getattr(template, 'variant_type', 'individual')
+            variant_icon = "🤖" if variant == "planner" else "👤"
+            print(f"  • {template.template_name} ({template.display_name})")
+            print(f"    {status} - {active} - {variant_icon} {variant}")
+            print(f"    📝 {template.description}")
+    except Exception as e:
+        print(f"❌ Error listing templates: {str(e)}")
+    finally:
+        session.close()
+def remove_all_templates():
+    """Remove all templates from database (for testing)"""
+    session = session_factory()
+    try:
+        deleted_count = session.query(AgentTemplate).delete()
+        session.commit()
+        print(f"🗑️ Removed {deleted_count} templates from database")
+    except Exception as e:
+        session.rollback()
+        print(f"❌ Error removing templates: {str(e)}")
+    finally:
+        session.close()
+def validate_config():
+    """Validate the agents_config.json structure"""
+    try:
+        templates_config = load_agents_config()
+        print(f"📋 Validating agents_config.json...")
+        print(f"✅ Found {len(templates_config)} templates")
+        # Check required fields
+        required_fields = ['template_name', 'display_name', 'description', 'prompt_template']
+        issues = []
+        for i, template in enumerate(templates_config):
+            for field in required_fields:
+                if field not in template:
+                    issues.append(f"Template {i}: Missing required field '{field}'")
+        if issues:
+            print(f"❌ Validation issues found:")
+            for issue in issues:
+                print(f"  • {issue}")
+        else:
+            print(f"✅ Configuration is valid")
+        # Show summary by category
+        categories = {}
+        for template in templates_config:
+            category = template.get('category', 'Uncategorized')
+            if category not in categories:
+                categories[category] = {'free': 0, 'premium': 0, 'individual': 0, 'planner': 0}
+            if template.get('is_premium_only', False):
+                categories[category]['premium'] += 1
+            else:
+                categories[category]['free'] += 1
+            if template.get('variant_type', 'individual') == 'planner':
+                categories[category]['planner'] += 1
+            else:
+                categories[category]['individual'] += 1
+        print(f"\n📊 Summary by category:")
+        for category, counts in categories.items():
+            total = counts['free'] + counts['premium']
+            print(f"  📁 {category}: {total} templates")
+            print(f"    🆓 Free: {counts['free']} | 🔒 Premium: {counts['premium']}")
+            print(f"    👤 Individual: {counts['individual']} | 🤖 Planner: {counts['planner']}")
+    except Exception as e:
+        print(f"❌ Error validating config: {str(e)}")
+def create_minimal_templates():
+    """Create a minimal set of essential templates for container environments"""
+    session = session_factory()
+    try:
+        print("🔧 Creating minimal template set...")
+        # Define minimal essential templates
+        minimal_templates = [
+            {
+                "template_name": "preprocessing_agent",
+                "display_name": "Data Preprocessing Agent",
+                "description": "Cleans and prepares DataFrame using Pandas and NumPy",
+                "icon_url": "/icons/templates/preprocessing_agent.svg",
+                "category": "Data Manipulation",
+                "is_premium_only": False,
+                "variant_type": "individual",
+                "base_agent": "preprocessing_agent",
+                "is_active": True,
+                "prompt_template": "You are a preprocessing agent that cleans and prepares data using Pandas and NumPy. Handle missing values, detect column types, and convert date strings to datetime. Generate clean Python code for data preprocessing based on the user's analysis goals."
+            },
+            {
+                "template_name": "data_viz_agent",
+                "display_name": "Data Visualization Agent",
+                "description": "Creates interactive visualizations using Plotly",
+                "icon_url": "/icons/templates/data_viz_agent.svg",
+                "category": "Data Visualization",
+                "is_premium_only": False,
+                "variant_type": "individual",
+                "base_agent": "data_viz_agent",
+                "is_active": True,
+                "prompt_template": "You are a data visualization agent. Create interactive visualizations using Plotly based on user requirements. Generate appropriate chart types, apply styling, and ensure visualizations effectively communicate insights."
+            },
+            {
+                "template_name": "sk_learn_agent",
+                "display_name": "Machine Learning Agent",
+                "description": "Trains ML models using scikit-learn",
+                "icon_url": "/icons/templates/sk_learn_agent.svg",
+                "category": "Data Modelling",
+                "is_premium_only": False,
+                "variant_type": "individual",
+                "base_agent": "sk_learn_agent",
+                "is_active": True,
+                "prompt_template": "You are a machine learning agent. Use scikit-learn to train and evaluate ML models including classification, regression, and clustering. Provide feature importance insights and model performance metrics."
+            }
+        ]
+        created_count = 0
+        for template_data in minimal_templates:
+            template_name = template_data["template_name"]
+            # Check if template already exists
+            existing = session.query(AgentTemplate).filter(
+                AgentTemplate.template_name == template_name
+            ).first()
+            if not existing:
+                template = AgentTemplate(
+                    template_name=template_name,
+                    display_name=template_data["display_name"],
+                    description=template_data["description"],
+                    icon_url=template_data["icon_url"],
+                    prompt_template=template_data["prompt_template"],
+                    category=template_data["category"],
+                    is_premium_only=template_data["is_premium_only"],
+                    is_active=template_data["is_active"],
+                    variant_type=template_data["variant_type"],
+                    base_agent=template_data["base_agent"],
+                    created_at=datetime.now(UTC),
+                    updated_at=datetime.now(UTC)
+                )
+                session.add(template)
+                print(f"✅ Created minimal template: {template_name}")
+                created_count += 1
+            else:
+                print(f"⏭️ Template already exists: {template_name}")
+        session.commit()
+        print(f"📊 Created {created_count} minimal templates")
+    except Exception as e:
+        session.rollback()
+        print(f"❌ Error creating minimal templates: {str(e)}")
+        raise
+    finally:
+        session.close()
+def populate_templates():
+    """Legacy compatibility function for backward compatibility"""
+    print("⚠️  Legacy populate_templates() called - checking for agents_config.json...")
+    # Check if agents_config.json exists anywhere
+    possible_paths = [
+        os.path.join(backend_dir, 'agents_config.json'),
+        os.path.join(project_root, 'agents_config.json'),
+        '/app/agents_config.json',
+        'agents_config.json'
+    ]
+    config_exists = any(os.path.exists(path) for path in possible_paths)
+    if config_exists:
+        print("📖 Found agents_config.json - using sync_agents_from_config()")
+        sync_agents_from_config()
+    else:
+        print("⚠️  agents_config.json not found - using fallback minimal templates")
+        print("💡 Creating essential templates for container environment")
+        create_minimal_templates()
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="SQLite Agent Template Management")
+    parser.add_argument("action", choices=["sync", "list", "remove-all", "validate"],
+                       help="Action to perform")
+    args = parser.parse_args()
+    if args.action == "sync":
+        print("🚀 Synchronizing agents from agents_config.json to SQLite...")
+        sync_agents_from_config()
+    elif args.action == "list":
+        list_templates()
+    elif args.action == "validate":
+        validate_config()
+    elif args.action == "remove-all":
+        confirm = input("⚠️ Are you sure you want to remove ALL templates? (yes/no): ")
+        if confirm.lower() == "yes":
+            remove_all_templates()
+        else:
+            print("Operation cancelled.")

scripts/tier_maker.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from src.utils.model_registry import MODEL_COSTS, MODEL_TIERS
+# divide models in 3 tiers based on cost per 1k tokens
+# tier 1: < $0.0005
+# tier 2: < $0.001
+# tier 3: > $0.05
+# tier 4: > $0.1
+TIERS_COST = {
+    "tier1": 0.0005,
+    "tier2": 0.001,
+    "tier3": 0.05,
+    "tier4": 0.1
+}
+def get_tier(model_name):
+    for provider, models in MODEL_COSTS.items():
+        for model, cost in models.items():
+            if model == model_name:
+                return cost
+    return None
+def get_tier_1():
+    tier_1 = []
+    for provider, models in MODEL_COSTS.items():
+        for model, cost in models.items():
+            if cost["input"] + cost["output"] < TIERS_COST["tier1"]:
+                tier_1.append(model)
+    return tier_1
+def get_tier_2():
+    tier_2 = []
+    for provider, models in MODEL_COSTS.items():
+        for model, cost in models.items():
+            if cost["input"] + cost["output"] >= TIERS_COST["tier1"] and cost["input"] + cost["output"] < TIERS_COST["tier2"]:
+                tier_2.append(model)
+    return tier_2
+def get_tier_3():
+    tier_3 = []
+    for provider, models in MODEL_COSTS.items():
+        for model, cost in models.items():
+            if cost["input"] + cost["output"] >= TIERS_COST["tier2"] and cost["input"] + cost["output"] < TIERS_COST["tier3"]:
+                tier_3.append(model)
+    return tier_3
+def get_tier_4():
+    tier_4 = []
+    for provider, models in MODEL_COSTS.items():
+        for model, cost in models.items():
+            if cost["input"] + cost["output"] >= TIERS_COST["tier3"]:
+                tier_4.append(model)
+    return tier_4
+# Print current tier definitions from registry
+import json
+print("Current tier definitions from registry:")
+print(json.dumps(MODEL_TIERS, indent=4))
+print("\n")
+# Generate new tier assignments based on cost
+model_tiers = {
+    "tier1": {
+        "name": "Basic",
+        "credits": 1,
+        "models": get_tier_1()
+    },
+    "tier2": {
+        "name": "Standard",
+        "credits": 3,
+        "models": get_tier_2()
+    },
+    "tier3": {
+        "name": "Premium",
+        "credits": 5,
+        "models": get_tier_3()
+    },
+    "tier4": {
+        "name": "Premium Plus",
+        "credits": 10,
+        "models": get_tier_4()
+    }
+}
+print("Suggested tier definitions based on cost:")
+print(json.dumps(model_tiers, indent=4))

src/__init__.py ADDED Viewed

File without changes

src/agents/agents.py ADDED Viewed

The diff for this file is too large to render. See raw diff

src/agents/deep_agents.py ADDED Viewed

	@@ -0,0 +1,1085 @@

+import asyncio
+import ast
+import json
+import os
+import dspy
+import numpy as np
+import pandas as pd
+from dotenv import load_dotenv
+from src.utils.logger import Logger
+import logging
+import datetime
+import re
+import textwrap
+def clean_print_statements(code_block):
+    """
+    This function cleans up any `print()` statements that might contain unwanted `\n` characters.
+    It ensures print statements are properly formatted without unnecessary newlines.
+    """
+    # This regex targets print statements, even if they have newlines inside
+    return re.sub(r'print\((.*?)(\\n.*?)(.*?)\)', r'print(\1\3)', code_block, flags=re.DOTALL)
+def clean_unicode_chars(text):
+    """
+    Clean Unicode characters that might cause encoding issues.
+    Replaces common Unicode characters with ASCII equivalents.
+    """
+    if not isinstance(text, str):
+        return text
+    # Replace common Unicode characters with ASCII equivalents
+    replacements = {
+        '\u2192': ' -> ',  # Right arrow
+        '\u2190': ' <- ',  # Left arrow
+        '\u2194': ' <-> ', # Left-right arrow
+        '\u2500': '-',     # Box drawing horizontal
+        '\u2502': '|',     # Box drawing vertical
+        '\u2026': '...',   # Ellipsis
+        '\u2013': '-',     # En dash
+        '\u2014': '-',     # Em dash
+        '\u201c': '"',     # Left double quotation mark
+        '\u201d': '"',     # Right double quotation mark
+        '\u2018': "'",     # Left single quotation mark
+        '\u2019': "'",     # Right single quotation mark
+    }
+    for unicode_char, ascii_replacement in replacements.items():
+        text = text.replace(unicode_char, ascii_replacement)
+    # Remove any remaining non-ASCII characters
+    text = text.encode('ascii', 'ignore').decode('ascii')
+    return text
+def remove_main_block(code):
+    # Match the __main__ block
+    pattern = r'(?m)^if\s+__name__\s*==\s*["\']__main__["\']\s*:\s*\n((?:\s+.*\n?)*)'
+    match = re.search(pattern, code)
+    if match:
+        main_block = match.group(1)
+        # Dedent the code block inside __main__
+        dedented_block = textwrap.dedent(main_block)
+        # Remove \n from any print statements in the block (also handling multiline print cases)
+        dedented_block = clean_print_statements(dedented_block)
+        # Replace the block in the code
+        cleaned_code = re.sub(pattern, dedented_block, code)
+        # Optional: Remove leading newlines if any
+        cleaned_code = cleaned_code.strip()
+        return cleaned_code
+    return code
+# Configure Plotly to prevent auto-display
+def configure_plotly_no_display():
+    """Configure Plotly to prevent automatic browser display"""
+    try:
+        import plotly.io as pio
+        # Set environment variables to prevent browser opening
+        os.environ['BROWSER'] = ''
+        os.environ['PLOTLY_RENDERER'] = 'json'
+        # Configure Plotly renderers
+        pio.renderers.default = 'json'
+        pio.templates.default = 'plotly_white'
+        # Disable Kaleido auto-display if available
+        try:
+            import plotly.graph_objects as go
+            # Configure figure defaults to not auto-display
+            go.Figure.show = lambda self, *args, **kwargs: None
+        except ImportError:
+            pass
+    except ImportError:
+        print("Warning: Plotly not available for configuration")
+# Call the configuration function immediately
+configure_plotly_no_display()
+logger = Logger("deep_agents", see_time=True, console_log=False)
+load_dotenv()
+class deep_questions(dspy.Signature):
+    """
+You are a data analysis assistant.
+Your role is to take a user's high-level analytical goal and generate a set of deep, targeted follow-up questions. These questions should guide an analyst toward a more thorough understanding of the goal by encouraging exploration, segmentation, and causal reasoning.
+Instructions:
+- Generate up to 5 insightful, data-relevant questions.
+- Use the dataset structure to tailor your questions (e.g., look at the available columns, data types, and what kind of information they can reveal).
+- The questions should help the user decompose their analytic goal and explore it from multiple angles (e.g., time trends, customer segments, usage behavior, external factors, feedback).
+- Each question should be specific enough to guide actionable analysis or investigation.
+- Use a clear and concise style, but maintain depth.
+Inputs:
+- goal: The user's analytical goal or main question they want to explore
+- dataset_info: A description of the dataset the user is querying, including:
+    - What the dataset represents
+    - Key columns and their data types
+Output:
+- deep_questions: A list of up to 5 specific, data-driven questions that support the analytic goal
+---
+Example:
+Analytical Goal:
+Understand why churn has been rising
+Dataset Info:
+Customer Retention Dataset tracking subscription activity over time.
+Columns:
+- customer_id (string)
+- join_date (date)
+- churn_date (date, nullable)
+- is_churned (boolean)
+- plan_type (string: 'basic', 'premium', 'enterprise')
+- region (string)
+- last_login_date (date)
+- avg_weekly_logins (float)
+- support_tickets_last_30d (int)
+- satisfaction_score (float, 0–10 scale)
+Decomposed Questions:
+1. How has the churn rate changed month-over-month, and during which periods was the increase most pronounced?
+2. Are specific plan types or regions showing a higher churn rate relative to others?
+3. What is the average satisfaction score and support ticket count among churned users compared to retained users?
+4. Do churned users exhibit different login behavior (e.g., avg_weekly_logins) in the weeks leading up to their churn date?
+5. What is the tenure distribution (time from join_date to churn_date) among churned customers, and are short-tenure users more likely to churn?
+    """
+    goal = dspy.InputField(desc="User analytical goal — what main insight or question they want to answer")
+    dataset_info = dspy.InputField(desc="A description of the dataset: what it represents, and the main columns with data types")
+    deep_questions = dspy.OutputField(desc="A list of up to five questions that help deeply explore the analytical goal using the dataset")
+class deep_synthesizer(dspy.Signature):
+    """
+You are a data analysis synthesis expert.
+Your job is to take the outputs from a multi-agent data analytics system - including the original user query, the code summaries from each agent, and the actual printed results from running those code blocks - and synthesize them into a comprehensive, well-structured final report.
+This report should:
+- Explain what steps were taken and why (based on the query)
+- Summarize the code logic used by each agent, without including raw code
+- Highlight key findings and results from the code outputs
+- Offer clear, actionable insights tied back to the user's original question
+- Be structured, readable, and suitable for decision-makers or analysts
+Instructions:
+- Begin with a brief restatement of the original query and what it aimed to solve
+- Organize your report step-by-step or by analytical theme (e.g., segmentation, trend analysis, etc.)
+- For each part, summarize what was analyzed, how (based on code summaries), and what the result was (based on printed output)
+- End with a final set of synthesized conclusions and potential next steps or recommendations
+Inputs:
+- query: The user's original analytical question or goal
+- summaries: A list of natural language descriptions of what each agent's code did
+- print_outputs: A list of printed outputs (results) from running each agent's code
+Output:
+- synthesized_report: A structured and readable report that ties all parts together, grounded in the code logic and results
+Example use:
+You are not just summarizing outputs - you're telling a story that answers the user's query using real data.
+    """
+    query = dspy.InputField(desc="The original user query or analytical goal")
+    summaries = dspy.InputField(desc="List of code summaries - each describing what a particular agent's code did")
+    print_outputs = dspy.InputField(desc="List of print outputs - the actual data insights generated by the code")
+    synthesized_report = dspy.OutputField(desc="The final, structured report that synthesizes all the information into clear insights")
+def clean_and_store_code(code, session_df=None):
+    """
+    Cleans and stores code execution results in a standardized format.
+    Args:
+        code (str): Raw code text to execute
+        session_df (DataFrame): Optional session DataFrame
+    Returns:
+        dict: Execution results containing printed_output, plotly_figs, and error info
+    """
+    import io
+    import sys
+    import re
+    import plotly.express as px
+    import plotly.graph_objects as go
+    from plotly.subplots import make_subplots
+    import plotly.io as pio
+    # Make session DataFrame available globally if provided
+    if session_df is not None:
+        globals()['df'] = session_df
+    # Initialize output containers
+    output_dict = {
+        'exec_result': None,
+        'printed_output': '',
+        'plotly_figs': [],
+        'error': None
+    }
+    try:
+        # Clean the code
+        cleaned_code = code.strip()
+        cleaned_code = cleaned_code.replace('```python', '').replace('```', '')
+        # Fix try statement syntax
+        cleaned_code = cleaned_code.replace('try\n', 'try:\n')
+        # Remove code patterns that would make the code unrunnable
+        invalid_patterns = [
+            '```', # Code block markers
+            '\\n', # Raw newlines
+            '\\t', # Raw tabs
+            '\\r', # Raw carriage returns
+        ]
+        for pattern in invalid_patterns:
+            if pattern in cleaned_code:
+                cleaned_code = cleaned_code.replace(pattern, '')
+        # Remove reading the csv file if it's already in the context
+        cleaned_code = re.sub(r"df\s*=\s*pd\.read_csv\([\"\'].*?[\"\']\).*?(\n|$)", '', cleaned_code)
+        # Only match assignments at top level (not indented)
+        # 1. Remove 'df = pd.DataFrame()' if it's at the top level
+        cleaned_code = re.sub(
+            r"^df\s*=\s*pd\.DataFrame\(\s*\)\s*(#.*)?$",
+            '',
+        cleaned_code,
+            flags=re.MULTILINE
+        )
+        cleaned_code = re.sub(r"plt\.show\(\).*?(\n|$)", '', cleaned_code)
+        # Remove all .show() method calls more comprehensively
+        cleaned_code = re.sub(r'\b\w*\.show\(\)', '', cleaned_code)
+        cleaned_code = re.sub(r'^\s*\w*fig\w*\.show\(\)\s*;?\s*$', '', cleaned_code, flags=re.MULTILINE)
+        # Additional patterns to catch more .show() variations
+        cleaned_code = re.sub(r'\.show\(\s*\)', '', cleaned_code)  # .show() with optional spaces
+        cleaned_code = re.sub(r'\.show\(\s*renderer\s*=\s*[\'"][^\'\"]*[\'"]\s*\)', '', cleaned_code)  # .show(renderer='...')
+        cleaned_code = re.sub(r'plotly_figs\[\d+\]\.show\(\)', '', cleaned_code)  # plotly_figs[0].show()
+        # More comprehensive patterns
+        cleaned_code = re.sub(r'\.show\([^)]*\)', '', cleaned_code)  # .show(any_args)
+        cleaned_code = re.sub(r'fig\w*\.show\(\s*[^)]*\s*\)', '', cleaned_code)  # fig*.show(any_args)
+        cleaned_code = re.sub(r'\w+_fig\w*\.show\(\s*[^)]*\s*\)', '', cleaned_code)  # *_fig*.show(any_args)
+        cleaned_code = remove_main_block(cleaned_code)
+        # Clean Unicode characters that might cause encoding issues
+        cleaned_code = clean_unicode_chars(cleaned_code)
+        # Capture printed output
+        old_stdout = sys.stdout
+        captured_output = io.StringIO()
+        sys.stdout = captured_output
+        # Create execution environment with common imports and session data
+        exec_globals = {
+            '__builtins__': __builtins__,
+            'pd': __import__('pandas'),
+            'np': __import__('numpy'),
+            'px': px,
+            'go': go,
+            'make_subplots': make_subplots,
+            'plotly_figs': [],
+            'print': print,
+        }
+        # Add session DataFrame if available
+        if session_df is not None:
+            exec_globals['df'] = session_df
+        elif 'df' in globals():
+            exec_globals['df'] = globals()['df']
+        # Add other common libraries that might be needed
+        try:
+            exec_globals['sm'] = __import__('statsmodels.api', fromlist=[''])
+            exec_globals['train_test_split'] = __import__('sklearn.model_selection', fromlist=['train_test_split']).train_test_split
+            exec_globals['LinearRegression'] = __import__('sklearn.linear_model', fromlist=['LinearRegression']).LinearRegression
+            exec_globals['mean_absolute_error'] = __import__('sklearn.metrics', fromlist=['mean_absolute_error']).mean_absolute_error
+            exec_globals['r2_score'] = __import__('sklearn.metrics', fromlist=['r2_score']).r2_score
+            exec_globals['LabelEncoder'] = __import__('sklearn.preprocessing', fromlist=['LabelEncoder']).LabelEncoder
+            exec_globals['warnings'] = __import__('warnings')
+        except ImportError as e:
+            print(f"Warning: Could not import some optional libraries: {e}")
+        # Execute the code
+        exec(cleaned_code, exec_globals)
+        # Restore stdout
+        sys.stdout = old_stdout
+        # Get the captured output
+        printed_output = captured_output.getvalue()
+        output_dict['printed_output'] = printed_output
+        # Extract plotly figures from the execution environment
+        if 'plotly_figs' in exec_globals:
+            plotly_figs = exec_globals['plotly_figs']
+            if isinstance(plotly_figs, list):
+                output_dict['plotly_figs'] = plotly_figs
+            else:
+                output_dict['plotly_figs'] = [plotly_figs] if plotly_figs else []
+        # Also check for any figure variables that might have been created
+        for var_name, var_value in exec_globals.items():
+            if hasattr(var_value, 'to_json') and hasattr(var_value, 'show'):
+                # This looks like a Plotly figure
+                if var_value not in output_dict['plotly_figs']:
+                    output_dict['plotly_figs'].append(var_value)
+    except Exception as e:
+        # Restore stdout in case of error
+        sys.stdout = old_stdout
+        error_msg = str(e)
+        output_dict['error'] = error_msg
+        output_dict['printed_output'] = f"Error executing code: {error_msg}"
+        print(f"Code execution error: {error_msg}")
+    return output_dict
+def score_code(args, code):
+    """
+    Cleans and stores code execution results in a standardized format.
+    Safely handles execution errors and returns clean output even if execution fails.
+    Ensures plotly figures are properly created and captured.
+    Args:
+        args: Arguments (unused but required for dspy.Refine)
+        code: Code object with combined_code attribute
+    Returns:
+        int: Score (0=error, 1=success, 2=success with plots)
+    """
+    code_text = code.combined_code
+    try:
+        # Fix try statement syntax
+        code_text = code_text.replace('try\n', 'try:\n')
+        code_text = code_text.replace('```python', '').replace('```', '')
+        # Remove code patterns that would make the code unrunnable
+        invalid_patterns = [
+            '```', '\\n', '\\t', '\\r'
+        ]
+        for pattern in invalid_patterns:
+            if pattern in code_text:
+                code_text = code_text.replace(pattern, '')
+        cleaned_code = re.sub(r"plt\.show\(\).*?(\n|$)", '', code_text)
+        # Remove all .show() method calls more comprehensively
+        cleaned_code = re.sub(r'\b\w*\.show\(\)', '', cleaned_code)
+        cleaned_code = re.sub(r'^\s*\w*fig\w*\.show\(\)\s*;?\s*$', '', cleaned_code, flags=re.MULTILINE)
+        # Additional patterns to catch more .show() variations
+        cleaned_code = re.sub(r'\.show\(\s*\)', '', cleaned_code)  # .show() with optional spaces
+        cleaned_code = re.sub(r'\.show\(\s*renderer\s*=\s*[\'"][^\'\"]*[\'"]\s*\)', '', cleaned_code)  # .show(renderer='...')
+        cleaned_code = re.sub(r'plotly_figs\[\d+\]\.show\(\)', '', cleaned_code)  # plotly_figs[0].show()
+        # More comprehensive patterns
+        cleaned_code = re.sub(r'\.show\([^)]*\)', '', cleaned_code)  # .show(any_args)
+        cleaned_code = re.sub(r'fig\w*\.show\(\s*[^)]*\s*\)', '', cleaned_code)  # fig*.show(any_args)
+        cleaned_code = re.sub(r'\w+_fig\w*\.show\(\s*[^)]*\s*\)', '', cleaned_code)  # *_fig*.show(any_args)
+        cleaned_code = remove_main_block(cleaned_code)
+        # Capture stdout using StringIO
+        from io import StringIO
+        import sys
+        import plotly.graph_objects as go
+        stdout_capture = StringIO()
+        original_stdout = sys.stdout
+        sys.stdout = stdout_capture
+        # Execute code in a new namespace to avoid polluting globals
+        local_vars = {}
+        exec(cleaned_code, globals(), local_vars)
+        # Capture any plotly figures from local namespace
+        plotly_figs = []
+        for var_name, var in local_vars.items():
+            if isinstance(var, go.Figure):
+                if not var.layout.title:
+                    var.update_layout(title=f"Figure {len(plotly_figs) + 1}")
+                if not var.layout.template:
+                    var.update_layout(template="plotly_white")
+                plotly_figs.append(var)
+            elif isinstance(var, (list, tuple)):
+                for item in var:
+                    if isinstance(item, go.Figure):
+                        if not item.layout.title:
+                            item.update_layout(title=f"Figure {len(plotly_figs) + 1}")
+                        if not item.layout.template:
+                            item.update_layout(template="plotly_white")
+                        plotly_figs.append(item)
+        # Restore stdout and get captured output
+        sys.stdout = original_stdout
+        captured_output = stdout_capture.getvalue()
+        stdout_capture.close()
+        # Calculate score based on execution and plot generation
+        score = 2 if plotly_figs else 1
+        return score
+    except Exception as e:
+        # Restore stdout in case of error
+        if 'stdout_capture' in locals():
+            sys.stdout = original_stdout
+            stdout_capture.close()
+        return 0
+class deep_planner(dspy.Signature):
+    """
+    You are an advanced multi-question planning agent. Your task is to generate the most optimized and minimal plan
+    to answer up to 5 analytical questions using available agents.
+    Your responsibilities:
+    1. Feasibility: Verify that the goal is achievable using the provided datasets and agent descriptions.
+    2. Optimization:
+       - Batch up to 2 similar questions per agent call.
+       - Reuse outputs across questions wherever possible.
+       - Avoid unnecessary agents or redundant processing.
+       - Minimize total agent calls while preserving correctness.
+    3. Clarity:
+       - Define clear variable usage (create/use).
+       - Specify concise step-by-step instructions per agent.
+       - Use dependency arrows (->) to indicate required agent outputs used by others.
+    Inputs:
+    - deep_questions: A list of up to 5 deep analytical questions (e.g., ["q1", "q2", ..., "q5"])
+    - dataset: The available dataset(s) in memory or context
+    - agents_desc: Dictionary containing each agent's name and its capabilities or descriptions
+    Outputs:
+    - plan_instructions: Detailed per-agent variable flow and functionality in the format:
+        {
+            "agent_x": {
+                "create": ["cleaned_data: DataFrame - cleaned version of the input dataset"],
+                "use": ["df: DataFrame - raw input dataset"],
+                "instruction": "Clean the dataset by handling null values and standardizing formats."
+            },
+            "agent_y": {
+                "create": ["analysis_results: dict - results of correlation analysis"],
+                "use": ["cleaned_data: DataFrame - output from @agent_x"],
+                "instruction": "Perform correlation analysis to identify strong predictors."
+            }
+        }
+    Output Goal:
+    Generate a small, clean, optimized execution plan using minimal agent calls, reusable outputs, and well-structured dependencies.
+    USE THE EXACT NAME OF THE AGENTS IN THE INSTRUCTIONS
+    """
+    deep_questions = dspy.InputField(desc="List of up to 5 deep analytical questions to answer")
+    dataset = dspy.InputField(desc="Available datasets, use 'df' as the working dataset")
+    agents_desc = dspy.InputField(desc="Descriptions of available agents and their functions")
+    plan_instructions = dspy.OutputField(desc="Variable-level instructions for each agent used in the plan")
+class deep_plan_fixer(dspy.Signature):
+    """
+    You are a plan instruction fixer agent. Your task is to take potentially malformed plan instructions
+    and convert them into a properly structured dictionary format that can be safely evaluated.
+    Your responsibilities:
+    1. Parse and validate the input plan instructions
+    2. Convert the instructions into a proper dictionary format
+    3. Ensure all agent instructions follow the required structure:
+       {
+           "@agent_name": {
+               "create": ["variable: type - description"],
+               "use": ["variable: type - description"],
+               "instruction": "clear instruction text"
+           }
+       }
+    4. Handle any malformed or missing components
+    5. Return a properly formatted dictionary string that can be safely evaluated
+    Inputs:
+    - plan_instructions: The potentially malformed plan instructions to fix
+    Outputs:
+    - fixed_plan: A properly formatted dictionary string that can be safely evaluated
+    """
+    plan_instructions = dspy.InputField(desc="The potentially malformed plan instructions to fix")
+    fixed_plan = dspy.OutputField(desc="Properly formatted dictionary string that can be safely evaluated")
+class final_conclusion(dspy.Signature):
+    """
+You are a high-level analytics reasoning engine.
+Your task is to take multiple synthesized analytical results (each answering part of the original query) and produce a cohesive final conclusion that directly addresses the user's original question.
+This is not just a summary — it's a judgment. Use evidence from the synthesized findings to:
+- Answer the original question with clarity
+- Highlight the most important insights
+- Offer any causal reasoning or patterns discovered
+- Suggest next steps or strategic recommendations where appropriate
+Instructions:
+- Focus on relevance to the original query
+- Do not just repeat what the synthesized sections say — instead, infer, interpret, and connect dots
+- Prioritize clarity and insight over detail
+- End with a brief "Next Steps" section if applicable
+Inputs:
+- query: The original user question or goal
+- synthesized_sections: A list of synthesized result sections from the deep_synthesizer step (each covering part of the analysis)
+Output:
+- final_summary: A cohesive final conclusion that addresses the query, draws insight, and offers high-level guidance
+---
+Example Output Structure:
+**Conclusion**
+Summarize the overall answer to the user's question, using the most compelling evidence across the synthesized sections.
+**Key Takeaways**
+- Bullet 1
+- Bullet 2
+- Bullet 3
+**Recommended Next Steps**
+(Optional based on context)
+    """
+    query = dspy.InputField(desc="The user's original query or analytical goal")
+    synthesized_sections = dspy.InputField(desc="List of synthesized outputs — each one corresponding to a sub-part of the analysis")
+    final_conclusion = dspy.OutputField(desc="A cohesive, conclusive answer that addresses the query and integrates key insights")
+class deep_code_synthesizer(dspy.Signature):
+    """
+You are a code synthesis and optimization engine that combines and fixes code from multiple analytical agents.
+Your task is to take code outputs from preprocessing, statistical analysis, machine learning, and visualization agents, then:
+- Combine them into a single, coherent analysis pipeline
+- Fix any errors or inconsistencies between agent outputs
+- Ensure proper data flow between steps
+- Optimize the combined code for efficiency
+- Add necessary imports and dependencies
+- Handle any data type mismatches or conversion issues
+- Validate and normalize data types between agent outputs (e.g., ensure DataFrame operations maintain DataFrame type)
+- Convert between common data structures (lists, dicts, DataFrames) as needed
+- Add type hints and validation checks
+- Ensure consistent variable naming across agents
+- Ensure all visualizations use Plotly exclusively
+- Create comprehensive visualizations that show all important variables and relationships
+- Store all Plotly figures in a list for later use in the report
+Instructions:
+- Review each agent's code for correctness and completeness
+- Ensure variables are properly passed between steps with consistent types
+- Fix any syntax errors or logical issues
+- Add error handling and type validation where needed
+- Optimize code structure and performance
+- Maintain consistent coding style
+- Add clear comments explaining the analysis flow
+- Add data type conversion functions where needed
+- Validate input/output types between agent steps
+- Handle edge cases where agents might return different data structures
+- Convert any non-Plotly visualizations to Plotly format
+- Ensure all important variables are visualized appropriately
+- Store all Plotly figures in a list called plotly_figs
+- Include appropriate titles, labels, and legends for all visualizations
+- Use consistent styling across all Plotly visualizations
+- DONOT COMMENT OUT ANYTHING AS THE CODE SHOULD RUN & SHOW OUTPUTS
+- THE DATASET IS ALREADY LOADED, DON'T CREATE FAKE DATA. 'df' is always loaded
+Inputs:
+- deep_questions- The five deep questions this system is answering
+- dataset_info - Information about the dataset structure and types
+- planner_instructions - the plan according to the planner, ensure that the final code makes everything coherent
+- code - List of all agent code
+Output:
+- combined_code: - A single, optimized Python script that combines all analysis steps with proper type handling and Plotly visualizations
+"""
+    deep_questions = dspy.InputField(desc="The five deep questions this system is answering")
+    dataset_info = dspy.InputField(desc="Information about the dataset")
+    planner_instructions = dspy.InputField(desc="The planner instructions for each")
+    code = dspy.InputField(desc="The code generated by all agents")
+    combined_code = dspy.OutputField(desc="A single, optimized Python script that combines all analysis steps")
+class deep_code_fix(dspy.Signature):
+    """
+    You are a code debugging and fixing agent that analyzes and repairs code errors.
+    Your task is to:
+    - Analyze error messages and identify root causes
+    - Fix syntax errors, logical issues, and runtime problems
+    - Ensure proper data type handling and conversions
+    - Add appropriate error handling and validation
+    - Maintain code style and documentation
+    - Preserve the original analysis intent
+    Instructions:
+    - Carefully analyze the error message and stack trace
+    - Identify the specific line(s) causing the error
+    - Determine if the issue is syntax, logic, or runtime related
+    - Fix the code while maintaining its original purpose
+    - Add appropriate error handling if needed
+    - Ensure the fix doesn't introduce new issues
+    - Document the changes made
+    Inputs:
+    - code: The code that generated the error
+    - error: The error message and stack trace
+    Output:
+    - fixed_code: The repaired code with error handling
+    - fix_explanation: Explanation of what was fixed and why
+    """
+    code = dspy.InputField(desc="The code that generated the error")
+    error = dspy.InputField(desc="The error message and stack trace")
+    fixed_code = dspy.OutputField(desc="The repaired code with error handling")
+    fix_explanation = dspy.OutputField(desc="Explanation of what was fixed and why")
+chart_instructions = """
+Chart Styling Guidelines:
+1. General Styling:
+   - Use a clean, professional color palette (e.g., Tableau, ColorBrewer)
+   - Include clear titles and axis labels
+   - Add appropriate legends
+   - Use consistent font sizes and styles
+   - Include grid lines where helpful
+   - Add hover information for interactive plots
+2. Specific Chart Types:
+   - Bar Charts:
+     * Use horizontal bars for many categories
+     * Sort bars by value when appropriate
+     * Use consistent bar widths
+     * Add value labels on bars
+   - Line Charts:
+     * Use distinct line styles/colors
+     * Add markers at data points
+     * Include trend lines when relevant
+     * Show confidence intervals if applicable
+   - Scatter Plots:
+     * Use appropriate marker sizes
+     * Add regression lines when needed
+     * Use color to show additional dimensions
+     * Include density contours for large datasets
+   - Heatmaps:
+     * Use diverging color schemes for correlation
+     * Include value annotations
+     * Sort rows/columns by similarity
+     * Add clear color scale legend
+3. Data Visualization Best Practices:
+   - Start axes at zero when appropriate
+   - Use log scales for wide-ranging data
+   - Include reference lines/benchmarks
+   - Add annotations for important points
+   - Show uncertainty where relevant
+   - Use consistent color encoding
+   - Include data source and timestamp
+   - Add clear figure captions
+4. Interactive Features:
+   - Enable zooming and panning
+   - Add tooltips with detailed information
+   - Include download options
+   - Allow toggling of data series
+   - Enable cross-filtering between charts
+5. Accessibility:
+   - Use colorblind-friendly palettes
+   - Include alt text for all visualizations
+   - Ensure sufficient contrast
+   - Make interactive elements keyboard accessible
+   - Provide text alternatives for key insights
+"""
+class deep_analysis_module(dspy.Module):
+    def __init__(self,agents, agents_desc):
+        # logger.log_message(f"Initializing deep_analysis_module with {agents} agents: {list(agents.keys())}", level=logging.INFO)
+        self.agents = agents
+        # Make all dspy operations async using asyncify
+        self.deep_questions = dspy.asyncify(dspy.Predict(deep_questions))
+        self.deep_planner = dspy.asyncify(dspy.ChainOfThought(deep_planner))
+        self.deep_synthesizer = dspy.asyncify(dspy.ChainOfThought(deep_synthesizer))
+        # Keep both asyncified and non-asyncified versions for code synthesizer
+        self.deep_code_synthesizer_sync = dspy.Predict(deep_code_synthesizer)  # For dspy.Refine
+        self.deep_code_synthesizer = dspy.asyncify(dspy.Predict(deep_code_synthesizer))  # For async use
+        self.deep_plan_fixer = dspy.asyncify(dspy.ChainOfThought(deep_plan_fixer))
+        self.deep_code_fixer = dspy.asyncify(dspy.ChainOfThought(deep_code_fix))
+        self.styling_instructions = chart_instructions
+        self.agents_desc = agents_desc
+        self.final_conclusion = dspy.asyncify(dspy.ChainOfThought(final_conclusion))
+        # logger.log_message(f"Deep analysis module initialized successfully with agents: {list(self.agents.keys())}", level=logging.INFO)
+    async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=None):
+        """
+        Execute deep analysis with streaming progress updates.
+        This is an async generator that yields progress updates incrementally.
+        """
+        # Make the session DataFrame available globally for code execution
+        if session_df is not None:
+            globals()['df'] = session_df
+        try:
+            # Step 1: Generate deep questions (20% progress)
+            yield {
+                "step": "questions",
+                "status": "processing",
+                "message": "Generating analytical questions...",
+                "progress": 10
+            }
+            questions = await self.deep_questions(goal=goal, dataset_info=dataset_info)
+            logger.log_message("Questions generated")
+            yield {
+                "step": "questions",
+                "status": "completed",
+                "content": questions.deep_questions,
+                "progress": 20
+            }
+            # Step 2: Create analysis plan (40% progress)
+            yield {
+                "step": "planning",
+                "status": "processing",
+                "message": "Creating analysis plan...",
+                "progress": 25
+            }
+            question_list = [q.strip() for q in questions.deep_questions.split('\n') if q.strip()]
+            deep_plan = await self.deep_planner(
+                deep_questions=questions.deep_questions,
+                dataset=dataset_info,
+                agents_desc=str(self.agents_desc)
+            )
+            logger.log_message("Plan created")
+            # Parse plan instructions
+            try:
+                plan_instructions = ast.literal_eval(deep_plan.plan_instructions)
+                if not isinstance(plan_instructions, dict):
+                    plan_instructions = json.loads(deep_plan.plan_instructions)
+                keys = [key for key in plan_instructions.keys()]
+                if not all(key in self.agents for key in keys):
+                    raise ValueError(f"Invalid agent key(s) in plan instructions. Available agents: {list(self.agents.keys())}")
+                logger.log_message(f"Plan instructions: {plan_instructions}", logging.INFO)
+                logger.log_message(f"Keys: {keys}", logging.INFO)
+            except (ValueError, SyntaxError, json.JSONDecodeError) as e:
+                try:
+                    deep_plan = await self.deep_plan_fixer(plan_instructions=deep_plan.plan_instructions)
+                    plan_instructions = ast.literal_eval(deep_plan.fixed_plan)
+                    if not isinstance(plan_instructions, dict):
+                        plan_instructions = json.loads(deep_plan.fixed_plan)
+                    keys = [key for key in plan_instructions.keys()]
+                except (ValueError, SyntaxError, json.JSONDecodeError) as e:
+                    logger.log_message(f"Error parsing plan instructions: {e}", logging.ERROR)
+                    raise e
+            logger.log_message("Instructions parsed")
+            yield {
+                "step": "planning",
+                "status": "completed",
+                "content": deep_plan.plan_instructions,
+                "progress": 40
+            }
+            # Step 3: Execute agent tasks (60% progress)
+            yield {
+                "step": "agent_execution",
+                "status": "processing",
+                "message": "Executing analysis agents...",
+                "progress": 45
+            }
+            queries = [
+                dspy.Example(
+                    goal=questions.deep_questions,
+                    dataset=dataset_info,
+                    plan_instructions=str(plan_instructions[key]),
+                    **({"styling_index": "Sample styling guidelines"} if "data_viz" in key or "viz" in key.lower() or "visual" in key.lower() or "plot" in key.lower() or "chart" in key.lower() else {})
+                ).with_inputs(
+                    "goal",
+                    "dataset",
+                    "plan_instructions",
+                    *(["styling_index"] if "data_viz" in key or "viz" in key.lower() or "visual" in key.lower() or "plot" in key.lower() or "chart" in key.lower() else [])
+                )
+                for key in keys
+            ]
+            tasks = [self.agents[key](**q) for q, key in zip(queries, keys)]
+            # Await all tasks to complete
+            summaries = []
+            codes = []
+            logger.log_message("Tasks started")
+            completed_tasks = 0
+            for task in asyncio.as_completed(tasks):
+                result = await task
+                summaries.append(result.summary)
+                codes.append(result.code)
+                completed_tasks += 1
+                # Update progress for each completed agent
+                agent_progress = 45 + (completed_tasks / len(tasks)) * 15  # 45% to 60%
+                yield {
+                    "step": "agent_execution",
+                    "status": "processing",
+                    "message": f"Completed {completed_tasks}/{len(tasks)} analysis agents...",
+                    "progress": int(agent_progress)
+                }
+                logger.log_message(f"Done with agent {completed_tasks}/{len(tasks)}")
+            yield {
+                "step": "agent_execution",
+                "status": "completed",
+                "message": "All analysis agents completed",
+                "progress": 60
+            }
+            # Step 4: Code synthesis (80% progress)
+            yield {
+                "step": "code_synthesis",
+                "status": "processing",
+                "message": "Analyzing code...",
+                "progress": 65
+            }
+            # Safely extract code from agent outputs
+            code = []
+            for c in codes:
+                try:
+                    cleaned_code = remove_main_block(c)
+                    if "```python" in cleaned_code:
+                        parts = cleaned_code.split("```python")
+                        if len(parts) > 1:
+                            extracted = parts[1].split("```")[0] if "```" in parts[1] else parts[1]
+                            code.append(extracted.replace('try\n','try:\n'))
+                        else:
+                            code.append(cleaned_code.replace('try\n','try:\n'))
+                    else:
+                        code.append(cleaned_code.replace('try\n','try:\n'))
+                except Exception as e:
+                    logger.log_message(f"Warning: Error processing code block: {e}", logging.WARNING)
+                    code.append(c.replace('try\n','try:\n'))
+            # Create deep coder without asyncify to avoid source inspection issues
+            deep_coder = dspy.Refine(module=self.deep_code_synthesizer_sync, N=5, reward_fn=score_code, threshold=1.0, fail_count=10)
+            # Check if we have valid API key
+            anthropic_key = os.environ.get('ANTHROPIC_API_KEY')
+            if not anthropic_key:
+                raise ValueError("ANTHROPIC_API_KEY environment variable is not set")
+            try:
+                # Create the LM instance that will be used
+                thread_lm = dspy.LM("anthropic/claude-sonnet-4-20250514", api_key=anthropic_key, max_tokens=17000)
+                logger.log_message("Starting code generation...")
+                start_time = datetime.datetime.now()
+                logger.log_message(f"Code generation started at: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
+                # Define the blocking function to run in thread
+                def run_deep_coder():
+                    with dspy.context(lm=thread_lm):
+                        return deep_coder(
+                            deep_questions=str(questions.deep_questions),
+                            dataset_info=dataset_info,
+                            planner_instructions=str(plan_instructions),
+                            code=str(code)
+                        )
+                # Use asyncio.to_thread for better async integration
+                deep_code = await asyncio.to_thread(run_deep_coder)
+                logger.log_message(f"Code generation completed at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            except Exception as e:
+                logger.log_message(f"Error during code generation: {str(e)}", logging.ERROR)
+                raise e
+            code = deep_code.combined_code
+            code = code.replace('```python', '').replace('```', '')
+            # Clean Unicode characters that might cause encoding issues
+            code = clean_unicode_chars(code)
+            yield {
+                "step": "code_synthesis",
+                "status": "completed",
+                "message": "Code synthesis completed",
+                "progress": 80
+            }
+            # Step 5: Execute code (85% progress)
+            yield {
+                "step": "code_execution",
+                "status": "processing",
+                "message": "Executing code...",
+                "progress": 82
+            }
+            # Execute the code with error handling and session DataFrame
+            try:
+                # Run code execution in thread pool to avoid blocking
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(clean_and_store_code, code, session_df)
+                    output = future.result(timeout=300)  # 5 minute timeout
+                logger.log_message(f"Deep Code executed")
+                if output.get('error'):
+                    logger.log_message(f"Warning: Code execution had errors: {output['error']}", logging.ERROR)
+                print_outputs = [output['printed_output']]
+                plotly_figs = [output['plotly_figs']]
+            except Exception as e:
+                logger.log_message(f"Error during code execution: {str(e)}", logging.ERROR)
+                output = {
+                    'exec_result': None,
+                    'printed_output': f"Code execution failed: {str(e)}",
+                    'plotly_figs': [],
+                    'error': str(e)
+                }
+                print_outputs = [output['printed_output']]
+                plotly_figs = [output['plotly_figs']]
+            yield {
+                "step": "code_execution",
+                "status": "completed",
+                "message": "Code execution completed",
+                "progress": 85
+            }
+            # Step 6: Synthesis (90% progress)
+            yield {
+                "step": "synthesis",
+                "status": "processing",
+                "message": "Synthesizing results...",
+                "progress": 87
+            }
+            synthesis = []
+            try:
+                synthesis_result = await self.deep_synthesizer(
+                    query=goal,
+                    summaries=str(summaries),
+                    print_outputs=str(output['printed_output'])
+                )
+                synthesis.append(synthesis_result)
+            except Exception as e:
+                logger.log_message(f"Error during synthesis: {str(e)}", logging.ERROR)
+                synthesis.append(type('obj', (object,), {'synthesized_report': f"Synthesis failed: {str(e)}"})())
+            logger.log_message("Synthesis done")
+            yield {
+                "step": "synthesis",
+                "status": "completed",
+                "message": "Synthesis completed",
+                "progress": 90
+            }
+            # Step 7: Final conclusion (100% progress)
+            yield {
+                "step": "conclusion",
+                "status": "processing",
+                "message": "Generating final conclusion...",
+                "progress": 95
+            }
+            try:
+                final_conclusion = await self.final_conclusion(
+                    query=goal,
+                    synthesized_sections=str([s.synthesized_report for s in synthesis])
+                )
+            except Exception as e:
+                logger.log_message(f"Error during final conclusion: {str(e)}", logging.ERROR)
+                final_conclusion = type('obj', (object,), {'final_conclusion': f"Final conclusion failed: {str(e)}"})()
+            logger.log_message("Conclusion Made")
+            return_dict = {
+                'goal': goal,
+                'deep_questions': questions.deep_questions,
+                'deep_plan': deep_plan.plan_instructions,
+                'summaries': summaries,
+                'code': code,
+                'plotly_figs': plotly_figs,
+                'synthesis': [s.synthesized_report for s in synthesis],
+                'final_conclusion': final_conclusion.final_conclusion
+            }
+            yield {
+                "step": "conclusion",
+                "status": "completed",
+                "message": "Analysis completed successfully",
+                "progress": 100,
+                "final_result": return_dict
+            }
+            logger.log_message("Return dict created")
+        except Exception as e:
+            logger.log_message(f"Error in deep analysis: {str(e)}", logging.ERROR)
+            yield {
+                "step": "error",
+                "status": "failed",
+                "message": f"Deep analysis failed: {str(e)}",
+                "progress": 0,
+                "error": str(e)
+            }
+    async def execute_deep_analysis(self, goal, dataset_info, session_df=None):
+        """
+        Legacy method for backward compatibility.
+        Executes the streaming analysis and returns the final result.
+        """
+        final_result = None
+        async for update in self.execute_deep_analysis_streaming(goal, dataset_info, session_df):
+            if update.get("step") == "conclusion" and update.get("status") == "completed":
+                final_result = update.get("final_result")
+            elif update.get("step") == "error":
+                raise Exception(update.get("message", "Unknown error"))
+        return final_result

src/agents/marketing_analytics_agents.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import dspy
+# Contains the DSPy agents for quantitative finance
+class bidding_strategy_agent(dspy.Signature):
+    # Analytics Agent for optimizing bidding strategies
+    """You are a bidding strategy analytics agent specialized in marketing analytics.
+    Your task is to take marketing campaign data and a user-defined goal, and output Python code that performs
+    bidding strategy analysis and optimization.
+    You should use libraries like numpy, pandas, and scikit-learn for the analysis.
+    Bidding strategy tasks include:
+    - Analyzing historical bid performance
+    - Optimizing bid values across channels
+    - Forecasting campaign performance
+    - A/B testing bid strategies
+    - ROI and conversion rate analysis
+    - Budget allocation optimization
+    Make sure your output is as intended!
+    You may be given recent agent interactions as a hint! With the first being the latest
+    You are logged in streamlit use st.write instead of print
+    """
+    dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df,columns. set df as copy of df")
+    goal = dspy.InputField(desc="The user defined goal ")
+    code = dspy.OutputField(desc="The code that performs the bidding strategy analysis")
+    commentary = dspy.OutputField(desc="The comments about what bidding strategy analysis is being performed")
+class marketing_reporting_agent(dspy.Signature):
+    # Analytics Agent for generating marketing reports
+    """You are a marketing reporting agent specialized in creating data-driven marketing reports.
+    Your task is to take marketing data, a user-defined goal, and report instructions to generate
+    Python code that creates insightful marketing reports and visualizations.
+    You should use libraries like numpy, pandas for the analysis and only plotly for visualization.
+    Make sure your output matches the report instructions and goal!
+    You are logged in streamlit use st.write instead of print
+    Use st.plotly_chart() for interactive plots
+    """
+    dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df,columns. set df as copy of df")
+    goal = dspy.InputField(desc="The user defined goal")
+    report_instructions = dspy.InputField(desc="Specific instructions for report format, metrics, and visualizations")
+    code = dspy.OutputField(desc="The code that generates the marketing report")
+class customer_analytics_agent(dspy.Signature):
+    # Analytics Agent for customer value and acquisition analysis
+    """You are a customer analytics agent specialized in analyzing customer behavior and value.
+    Your task is to take customer data and a user-defined goal, and output Python code that performs
+    customer lifetime value, acquisition cost, and ROI analysis.
+    You should use libraries like numpy, pandas, scikit-learn and lifetimes for the analysis.
+    Customer analytics tasks include:
+    - Customer Lifetime Value (CLV/LTV) modeling
+    - Customer Acquisition Cost (CAC) analysis
+    - Customer segmentation and clustering
+    - Churn prediction and prevention
+    - Customer journey mapping
+    - ROI and retention metrics
+    - Purchase behavior analysis
+    Make sure your output is as intended!
+    You may be given recent agent interactions as a hint! With the first being the latest
+    You are logged in streamlit use st.write instead of print
+    """
+    dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df,columns. set df as copy of df")
+    goal = dspy.InputField(desc="The user defined goal ")
+    code = dspy.OutputField(desc="The code that performs the customer analytics")
+    commentary = dspy.OutputField(desc="The comments about what customer analysis is being performed")

src/agents/memory_agents.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import dspy
+class memory_summarize_agent(dspy.Signature):
+    """
+    You are an AI agent which helps summarize other agent responses and user-input.
+    Keep these instructions in mind:
+    - Analyze the provided text.
+    - Present the extracted details in bullet points:
+      - User Query: The user query/goal summarized, with only important information retained
+      - Agent: Include agent name
+      - Stack_Used: All python packages used
+      - Actions: What actions did the agent_name take, summarize them like "Agent visualized a line chart using plotly"
+    """
+    agent_response = dspy.InputField(desc="What the agents output, commentary and code")
+    user_goal = dspy.InputField(desc= "User query or intended goal")
+    summary = dspy.OutputField(desc ="The summary generated in the format requested")
+class error_memory_agent(dspy.Signature):
+    """
+Prompt for error_summarize Agent:
+Agent Name: error_summarize
+Purpose: To generate a concise summary of an error in Python code and provide a clear correction, along with relevant metadata and user query information. This summary will help in understanding the error and applying the correction.
+Input Data:
+Incorrect Python Code: (A snippet of code that produced an error)
+Meta Data:
+Agent Name: (Name of the agent that processed the code)
+Agent Version: (Version of the agent that processed the code)
+Timestamp: (When the error occurred)
+User Query: (The query or task that led to the incorrect code execution)
+Human-Defined Correction: (The corrected code or solution provided by a human expert)
+Processing Instructions:
+Error Analysis:
+Analyze the incorrect Python code to determine the type of error and its cause.
+Summary Creation:
+Generate a brief summary of the error, highlighting the key issue in the code.
+Provide a short explanation of the correction that resolves the issue.
+Output Formatting:
+Format the summary to include:
+Error Summary: A concise description of the error.
+Correction: A brief explanation of how to correct the error.
+Integration:
+Ensure the summary is clear and informative for future reference.
+Output Data:
+Error Summary:
+Error Summary: (Brief description of the error)
+Correction: (Concise explanation of the fix)
+Example Output:
+Error Summary: The IndexError occurred because the code attempted to access an element at an index that is out of range for the list.
+Correction: Ensure the index is within the bounds of the list. For example, use if index < len(my_list): to check the index before accessing the list element.
+    """
+    incorrect_code = dspy.InputField(desc="Error causing code")
+    error_metadata = dspy.InputField(desc="The description of the error generated, with user/agent information for context")
+    correction = dspy.InputField(desc="Correction suggested by AI or done manually by human")
+    summary = dspy.OutputField(desc="The description which must contain information about the error and how to correct it")

src/agents/retrievers/retrievers.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# This file handles the data-preprocessing and creates retrievers
+import pandas as pd
+import numpy as np
+from datetime import datetime
+# instructions also stored here
+instructions ="""
+Here are the instructions for the AI system with the specified agents:
+### AI System Instructions
+#### Agents
+- `@data_viz_agent`: Handles queries related to data visualization.
+- `@sk_learn_agent`: Handles queries related to machine learning using scikit-learn.
+- `@statistical_analytics_agent`: Handles queries related to statistical analysis.
+- `@preprocessing_agent`: Handles queries related to data preprocessing.
+#### Query Routing
+1. **Direct Agent Routing**:
+    - If the user specifies an agent in their query using `@agent_name`, the query will be directly routed to the specified agent.
+    - Example: `@data_viz_agent Create a bar chart from the following data.`
+2. **Planner-Based Routing**:
+    - If the user does not specify an agent, the query will be routed to the system's planner.
+    - The planner will analyze the query and determine the most appropriate agent to handle the request.
+    - Example: `Generate a confusion matrix from this dataset.`
+PLEASE READ THE INSTRUCTIONS! Thank you
+"""
+# For every column collects some useful information like top10 categories and min,max etc if applicable
+def return_vals(df,c):
+    if isinstance(df[c].iloc[10], (int, float, complex)):
+        return {'max_value':max(df[c]),'min_value': min(df[c]), 'mean_value':np.mean(df[c])}
+    elif(isinstance(df[c].iloc[10],datetime)):
+        return {str(max(df[c])), str(min(df[c])), str(np.mean(df[c]))}
+    else:
+        return {'top_10_values':df[c].value_counts()[:10], 'total_categoy_count':len(df[c].unique())}
+#removes `,` from numeric columns
+def correct_num(df,c):
+    try:
+        df[c] = df[c].fillna('0').str.replace(',','').astype(float)
+        return df[c]
+    except:
+        return df[c]
+# does most of the pre-processing
+def make_data(df, desc):
+    dict_ = {}
+    dict_['df_name'] = "The data is loaded as df"
+    dict_['Description'] = desc
+    dict_['dataframe_head_view'] = df.head(2).to_markdown()
+    # dict_['all_column_names'] = str(list(df.columns[:20]))
+    # for c in df.columns:
+    #     df[c] = correct_num(df,c)
+    #     try:
+    #         dict_[c] = {'column_name':c,'type':str(type(df[c].iloc[0])), 'column_information':return_vals(df,c)}
+    #     except:
+    #         dict_[c] = {'column_name':c,'type':str(type(df[c].iloc[0])), 'column_information':'NA'}
+    return dict_
+# These are stored styling instructions for data_viz_agent, helps generate good graphs
+styling_instructions =[
+    """
+        Dont ignore any of these instructions.
+        For a line chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label and try to use multiple colors if more than one line
+        Annotate the min and max of the line
+        Display numbers in thousand(K) or Million(M) if larger than 1000/100000
+        Show percentages in 2 decimal points with '%' sign
+        Default size of chart should be height =1200 and width =1000
+        """
+   , """
+        Dont ignore any of these instructions.
+        For a bar chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000.
+        Annotate the values of the bar chart
+        If variable is a percentage show in 2 decimal points with '%' sign.
+        Default size of chart should be height =1200 and width =1000
+        """
+        ,
+          """
+        For a histogram chart choose a bin_size of 50
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+          """
+        For a pie chart only show top 10 categories, bundle rest as others
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+          """
+        Do not ignore any of these instructions
+        always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1.
+        Always give a title and make bold using html tag axis label
+        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
+        Don't add K/M if number already in , or value is not a number
+        If variable is a percentage show in 2 decimal points with '%'
+        Default size of chart should be height =1200 and width =1000
+        """,
+"""
+    For a heat map
+    Use the 'plotly_white' template for a clean, white background.
+    Set a chart title
+    Style the X-axis with a black line color, 0.2 line width, 1 grid width, format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    .style the Y-axis with a black line color, 0.2 line width, 1 grid width format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    . Set the figure dimensions to a height of 1200 pixels and a width of 1000 pixels.
+""",
+"""
+    For a Histogram, used for returns/distribution plotting
+    Use the 'plotly_white' template for a clean, white background.
+    Set a chart title
+    Style the X-axis  1 grid width, format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    .style the Y-axis, 1 grid width format 1000/1000000 as K/M
+    Do not format non-numerical numbers
+    Use an opacity of 0.75
+     Set the figure dimensions to a height of 1200 pixels and a width of 1000 pixels.
+"""
+         ]

src/db/__init__.py ADDED Viewed

File without changes

src/db/init_db.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import logging
+import os
+from dotenv import load_dotenv
+from sqlalchemy import create_engine, event
+from sqlalchemy.orm import sessionmaker
+from src.db.schemas.models import Base
+from src.utils.logger import Logger
+logger = Logger("init_db", see_time=True, console_log=True)
+load_dotenv()
+# Create the database engine based on environment variable
+DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///chat_database.db")
+# Determine database type and set appropriate engine configurations
+if DATABASE_URL.startswith('postgresql'):
+    # PostgreSQL-specific configuration
+    engine = create_engine(
+        DATABASE_URL,
+        pool_size=10,
+        max_overflow=20,
+        pool_pre_ping=True,  # Check connection validity before use
+        pool_recycle=300     # Recycle connections after 5 minutes
+    )
+    is_postgresql = True
+    logger.log_message("Using PostgreSQL database engine", logging.INFO)
+else:
+    # SQLite configuration
+    engine = create_engine(DATABASE_URL)
+    is_postgresql = False
+    # For SQLite, enable foreign key constraints
+    @event.listens_for(engine, "connect")
+    def set_sqlite_pragma(dbapi_connection, connection_record):
+        cursor = dbapi_connection.cursor()
+        cursor.execute("PRAGMA foreign_keys=ON")
+        cursor.close()
+    logger.log_message("Using SQLite database engine", logging.INFO)
+# Create session factory
+Session = sessionmaker(bind=engine)
+session_factory = Session
+# Database initialization function
+def init_db():
+    # Create all tables
+    Base.metadata.create_all(engine)
+    logger.log_message("Database and tables created successfully.", logging.INFO)
+    logger.log_message(f"Models: {Base.metadata.tables.keys()}", logging.INFO)
+# Utility function to get a new session
+def get_session():
+    return Session()
+def get_db():
+    db = Session()
+    try:
+        yield db
+    except Exception as e:
+        logger.log_message(f"Error getting database session: {e}", logging.ERROR)
+    finally:
+        db.close()
+# Add function to check if using PostgreSQL
+def is_postgres_db():
+    return is_postgresql
+if __name__ == "__main__":
+    init_db()

src/db/schemas/__init__.py ADDED Viewed

File without changes

src/db/schemas/models.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, DateTime, Text, Float, Boolean, JSON, UniqueConstraint
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, relationship
+from datetime import datetime, UTC
+# Define the base class for declarative models
+Base = declarative_base()
+# Define the Users table
+class User(Base):
+    __tablename__ = 'users'
+    user_id = Column(Integer, primary_key=True, autoincrement=True)
+    username = Column(String, unique=True, nullable=False)
+    email = Column(String, unique=True, nullable=False)
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    # Add relationship for cascade options
+    chats = relationship("Chat", back_populates="user", cascade="all, delete-orphan")
+    usage_records = relationship("ModelUsage", back_populates="user")
+    deep_analysis_reports = relationship("DeepAnalysisReport", back_populates="user", cascade="all, delete-orphan")
+    template_preferences = relationship("UserTemplatePreference", back_populates="user", cascade="all, delete-orphan")
+# Define the Chats table
+class Chat(Base):
+    __tablename__ = 'chats'
+    chat_id = Column(Integer, primary_key=True, autoincrement=True)
+    user_id = Column(Integer, ForeignKey('users.user_id', ondelete="CASCADE"), nullable=True)
+    title = Column(String, default='New Chat')
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    # Add relationships for cascade options
+    user = relationship("User", back_populates="chats")
+    messages = relationship("Message", back_populates="chat", cascade="all, delete-orphan")
+    usage_records = relationship("ModelUsage", back_populates="chat")
+# Define the Messages table
+class Message(Base):
+    __tablename__ = 'messages'
+    message_id = Column(Integer, primary_key=True, autoincrement=True)
+    chat_id = Column(Integer, ForeignKey('chats.chat_id', ondelete="CASCADE"), nullable=False)
+    sender = Column(String, nullable=False)  # 'user' or 'ai'
+    content = Column(Text, nullable=False)
+    timestamp = Column(DateTime, default=lambda: datetime.now(UTC))
+    # Add relationship for cascade options
+    chat = relationship("Chat", back_populates="messages")
+    feedback = relationship("MessageFeedback", back_populates="message", uselist=False, cascade="all, delete-orphan")
+# Define the Model Usage table
+class ModelUsage(Base):
+    """Tracks AI model usage metrics for analytics and billing purposes."""
+    __tablename__ = 'model_usage'
+    usage_id = Column(Integer, primary_key=True)
+    user_id = Column(Integer, ForeignKey('users.user_id', ondelete="SET NULL"), nullable=True)
+    chat_id = Column(Integer, ForeignKey('chats.chat_id', ondelete="SET NULL"), nullable=True)
+    model_name = Column(String(100), nullable=False)
+    provider = Column(String(50), nullable=False)
+    prompt_tokens = Column(Integer, default=0)
+    completion_tokens = Column(Integer, default=0)
+    total_tokens = Column(Integer, default=0)
+    query_size = Column(Integer, default=0)  # Size in characters
+    response_size = Column(Integer, default=0)  # Size in characters
+    cost = Column(Float, default=0.0)  # Cost in USD
+    timestamp = Column(DateTime, default=lambda: datetime.now(UTC))
+    is_streaming = Column(Boolean, default=False)
+    request_time_ms = Column(Integer, default=0)  # Request processing time in milliseconds
+    # Add relationships
+    user = relationship("User", back_populates="usage_records")
+    chat = relationship("Chat", back_populates="usage_records")
+# Define the Code Execution table
+class CodeExecution(Base):
+    """Tracks code execution attempts and results for analysis and debugging."""
+    __tablename__ = 'code_executions'
+    execution_id = Column(Integer, primary_key=True, autoincrement=True)
+    message_id = Column(Integer, ForeignKey('messages.message_id', ondelete="CASCADE"), nullable=True)
+    chat_id = Column(Integer, ForeignKey('chats.chat_id', ondelete="CASCADE"), nullable=True)
+    user_id = Column(Integer, ForeignKey('users.user_id', ondelete="SET NULL"), nullable=True)
+    # Code tracking
+    initial_code = Column(Text, nullable=True)  # First version of code submitted
+    latest_code = Column(Text, nullable=True)  # Most recent version of code
+    # Execution results
+    is_successful = Column(Boolean, default=False)
+    output = Column(Text, nullable=True)  # Full output including errors
+    # Model and agent information
+    model_provider = Column(String(50), nullable=True)
+    model_name = Column(String(100), nullable=True)
+    model_temperature = Column(Float, nullable=True)
+    model_max_tokens = Column(Integer, nullable=True)
+    # Failure information
+    failed_agents = Column(Text, nullable=True)  # JSON list of agent names that failed
+    error_messages = Column(Text, nullable=True)  # JSON map of error messages by agent
+    # Metadata
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+class MessageFeedback(Base):
+    """Tracks user feedback and model settings for each message."""
+    __tablename__ = 'message_feedback'
+    feedback_id = Column(Integer, primary_key=True, autoincrement=True)
+    message_id = Column(Integer, ForeignKey('messages.message_id', ondelete="CASCADE"), nullable=False)
+    # User feedback
+    rating = Column(Integer, nullable=True)  # Star rating (1-5)
+    # Model settings used for this message
+    model_name = Column(String(100), nullable=True)
+    model_provider = Column(String(50), nullable=True)
+    temperature = Column(Float, nullable=True)
+    max_tokens = Column(Integer, nullable=True)
+    # Metadata
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+    # Relationship
+    message = relationship("Message", back_populates="feedback")
+class DeepAnalysisReport(Base):
+    """Stores deep analysis reports with comprehensive analysis data and metadata."""
+    __tablename__ = 'deep_analysis_reports'
+    report_id = Column(Integer, primary_key=True, autoincrement=True)
+    report_uuid = Column(String(100), unique=True, nullable=False)  # Frontend generated ID
+    user_id = Column(Integer, ForeignKey('users.user_id', ondelete="CASCADE"), nullable=True)
+    # Analysis objective and status
+    goal = Column(Text, nullable=False)  # The analysis objective/question
+    status = Column(String(20), nullable=False, default='pending')  # 'pending', 'running', 'completed', 'failed'
+    # Timing information
+    start_time = Column(DateTime, default=lambda: datetime.now(UTC))
+    end_time = Column(DateTime, nullable=True)
+    duration_seconds = Column(Integer, nullable=True)  # Calculated duration
+    # Analysis components (stored as text/JSON)
+    deep_questions = Column(Text, nullable=True)  # Generated analytical questions
+    deep_plan = Column(Text, nullable=True)  # Analysis plan
+    summaries = Column(JSON, nullable=True)  # Array of analysis summaries
+    analysis_code = Column(Text, nullable=True)  # Generated Python code
+    plotly_figures = Column(JSON, nullable=True)  # Array of Plotly figure data
+    synthesis = Column(JSON, nullable=True)  # Array of synthesis insights
+    final_conclusion = Column(Text, nullable=True)  # Final analysis conclusion
+    # Report output
+    html_report = Column(Text, nullable=True)  # Complete HTML report
+    report_summary = Column(Text, nullable=True)  # Brief summary for listing
+    # Execution tracking
+    progress_percentage = Column(Integer, default=0)  # Progress 0-100
+    steps_completed = Column(JSON, nullable=True)  # Array of completed step names
+    error_message = Column(Text, nullable=True)  # Error details if failed
+    # Model and cost tracking
+    model_provider = Column(String(50), nullable=True)
+    model_name = Column(String(100), nullable=True)
+    total_tokens_used = Column(Integer, default=0)
+    estimated_cost = Column(Float, default=0.0)  # Cost in USD
+    credits_consumed = Column(Integer, default=0)  # Credits deducted for this analysis
+    # Metadata
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+    # Relationships
+    user = relationship("User", back_populates="deep_analysis_reports")
+class AgentTemplate(Base):
+    """Stores predefined agent templates that users can enable/disable."""
+    __tablename__ = 'agent_templates'
+    template_id = Column(Integer, primary_key=True, autoincrement=True)
+    # Template definition
+    template_name = Column(String(100), nullable=False, unique=True)  # e.g., 'pytorch_specialist', 'data_cleaning_expert'
+    display_name = Column(String(200), nullable=True)  # User-friendly display name
+    description = Column(Text, nullable=False)  # Short description for template selection
+    prompt_template = Column(Text, nullable=False)  # Main prompt/instructions for agent behavior
+    # Template appearance
+    icon_url = Column(String(500), nullable=True)  # URL to template icon (CDN, data URL, or relative path)
+    # Template categorization
+    category = Column(String(50), nullable=True)  # 'Visualization', 'Modelling', 'Data Manipulation'
+    is_premium_only = Column(Boolean, default=False)  # True if template requires premium subscription
+    # Agent variant support
+    variant_type = Column(String(20), default='individual')  # 'planner', 'individual', or 'both'
+    base_agent = Column(String(100), nullable=True)  # Base agent name for variants (e.g., 'preprocessing_agent')
+    # Status and metadata
+    is_active = Column(Boolean, default=True)
+    # Timestamps
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+    # Relationships
+    user_preferences = relationship("UserTemplatePreference", back_populates="template", cascade="all, delete-orphan")
+class UserTemplatePreference(Base):
+    """Tracks user preferences and usage for agent templates."""
+    __tablename__ = 'user_template_preferences'
+    preference_id = Column(Integer, primary_key=True, autoincrement=True)
+    user_id = Column(Integer, ForeignKey('users.user_id', ondelete="CASCADE"), nullable=False)
+    template_id = Column(Integer, ForeignKey('agent_templates.template_id', ondelete="CASCADE"), nullable=False)
+    # User preferences
+    is_enabled = Column(Boolean, default=True)  # Whether user has this template enabled
+    # Usage tracking
+    usage_count = Column(Integer, default=0)  # Track how many times user has used this template
+    last_used_at = Column(DateTime, nullable=True)  # Last time user used this template
+    # Timestamps
+    created_at = Column(DateTime, default=lambda: datetime.now(UTC))
+    updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+    # Relationships
+    user = relationship("User", back_populates="template_preferences")
+    template = relationship("AgentTemplate", back_populates="user_preferences")
+    # Constraints - user can only have one preference record per template
+    __table_args__ = (
+        UniqueConstraint('user_id', 'template_id', name='unique_user_template_preference'),
+    )

src/managers/ai_manager.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import logging
+from typing import Optional, Dict, Any
+from src.db.schemas.models import ModelUsage
+from src.db.init_db import session_factory
+from datetime import datetime, UTC
+from src.routes.analytics_routes import handle_new_model_usage
+import asyncio
+from src.utils.logger import Logger
+from src.utils.model_registry import get_provider_for_model, calculate_cost
+logger = Logger(name="ai_manager", see_time=True, console_log=True)
+class AI_Manager:
+    """Manages AI model interactions and usage tracking"""
+    def __init__(self):
+        self.tokenizer = None
+        # Initialize tokenizer - could use tiktoken or another tokenizer
+        try:
+            import tiktoken
+            self.tokenizer = tiktoken.get_encoding("cl100k_base")
+        except ImportError:
+            logger.log_message("Tiktoken not available, using simple tokenizer", level=logging.WARNING)
+            self.tokenizer = SimpleTokenizer()
+    def save_usage_to_db(self, user_id, chat_id, model_name, provider,
+                       prompt_tokens, completion_tokens, total_tokens,
+                       query_size, response_size, cost, request_time_ms,
+                       is_streaming=False):
+        """Save model usage data to the database"""
+        try:
+            session = session_factory()
+            usage = ModelUsage(
+                user_id=user_id,
+                chat_id=chat_id,
+                model_name=model_name,
+                provider=provider,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+                query_size=query_size,
+                response_size=response_size,
+                cost=cost,
+                timestamp=datetime.now(UTC),
+                is_streaming=is_streaming,
+                request_time_ms=request_time_ms
+            )
+            session.add(usage)
+            session.commit()
+            # logger.info(f"Saved usage data to database for chat {chat_id}: {total_tokens} tokens, ${cost:.6f}")
+            # Broadcast the event asynchronously
+            asyncio.create_task(handle_new_model_usage(usage))
+        except Exception as e:
+            session.rollback()
+            logger.log_message(f"Error saving usage data to database for chat {chat_id}: {str(e)}", level=logging.ERROR)
+        finally:
+            session.close()
+    def calculate_cost(self, model_name, input_tokens, output_tokens):
+        """Calculate the cost for using the model based on tokens"""
+        if not model_name:
+            return 0
+        # Get provider for logging
+        model_provider = get_provider_for_model(model_name)
+        logger.log_message(f"[> ] Model Name: {model_name}, Model Provider: {model_provider}", level=logging.INFO)
+        # Use the centralized calculate_cost function
+        return calculate_cost(model_name, input_tokens, output_tokens)
+    def get_provider_for_model(self, model_name):
+        """Determine the provider based on model name"""
+        # Use the centralized get_provider_for_model function
+        return get_provider_for_model(model_name)
+class SimpleTokenizer:
+    """A very simple tokenizer implementation for fallback"""
+    def encode(self, text):
+        return len(text.split())

src/managers/chat_manager.py ADDED Viewed

	@@ -0,0 +1,944 @@

+from sqlalchemy import create_engine, func, exists
+from sqlalchemy.orm import sessionmaker, scoped_session
+from sqlalchemy.exc import SQLAlchemyError
+from src.db.schemas.models import Base, User, Chat, Message, ModelUsage, MessageFeedback
+import logging
+from typing import List, Dict, Optional, Any
+from datetime import datetime, UTC
+from src.utils.logger import Logger
+from src.utils.model_registry import MODEL_COSTS
+import re
+logger = Logger("chat_manager", see_time=True, console_log=False)
+class ChatManager:
+    """
+    Manages chat operations including creating, storing, retrieving, and updating chats and messages.
+    Provides an interface between the application and the database for chat-related operations.
+    """
+    def __init__(self, db_url):
+        """
+        Initialize the ChatManager with a database connection.
+        Args:
+            db_url: Database connection URL (defaults to SQLite)
+        """
+        self.engine = create_engine(db_url)
+        Base.metadata.create_all(self.engine)  # Ensure tables exist
+        self.Session = scoped_session(sessionmaker(bind=self.engine))
+        # Add price mappings for different models
+        self.model_costs = MODEL_COSTS
+        # Add model providers mapping
+        self.model_providers = {
+            "gpt-": "openai",
+            "claude-": "anthropic",
+            "llama-": "groq",
+            "mistral-": "groq",
+        }
+    def create_chat(self, user_id: Optional[int] = None) -> Dict[str, Any]:
+        """
+        Create a new chat session.
+        Args:
+            user_id: Optional user ID if authentication is enabled
+        Returns:
+            Dictionary containing chat information
+        """
+        session = self.Session()
+        try:
+            # Create a new chat
+            chat = Chat(
+                user_id=user_id,
+                title='New Chat',
+                created_at=datetime.now(UTC)
+            )
+            session.add(chat)
+            session.flush()  # Flush to get the ID before commit
+            chat_id = chat.chat_id  # Get the ID now
+            session.commit()
+            logger.log_message(f"Created new chat {chat_id} for user {user_id}", level=logging.INFO)
+            return {
+                "chat_id": chat_id,
+                "user_id": chat.user_id,
+                "title": chat.title,
+                "created_at": chat.created_at.isoformat()
+            }
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error creating chat: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def add_message(self, chat_id: int, content: str, sender: str, user_id: Optional[int] = None) -> Dict[str, Any]:
+        """
+        Add a message to a chat.
+        Args:
+            chat_id: ID of the chat to add the message to
+            content: Message content
+            sender: Message sender ('user' or 'ai')
+            user_id: Optional user ID to verify ownership
+        Returns:
+            Dictionary containing message information
+        """
+        session = self.Session()
+        try:
+            # Check if chat exists and belongs to the user if user_id is provided
+            query = session.query(Chat).filter(Chat.chat_id == chat_id)
+            if user_id is not None:
+                query = query.filter((Chat.user_id == user_id) | (Chat.user_id.is_(None)))
+            chat = query.first()
+            if not chat:
+                raise ValueError(f"Chat with ID {chat_id} not found or access denied")
+            ##! Ensure content length is reasonable for PostgreSQL
+            # max_content_length = 10000  # PostgreSQL can handle large text but let's be cautious
+            # if content and len(content) > max_content_length:
+            #     logger.log_message(f"Truncating message content from {len(content)} to {max_content_length} characters",
+            #                       level=logging.WARNING)
+            #     content = content[:max_content_length]
+            # Create a new message
+            message = Message(
+                chat_id=chat_id,
+                content=content,
+                sender=sender,
+                timestamp=datetime.now(UTC)
+            )
+            session.add(message)
+            session.flush()  # Flush to get the ID before commit
+            message_id = message.message_id  # Get ID now
+            # If this is the first AI response and chat title is still default,
+            # update the chat title based on the first user query
+            if sender == 'ai':
+                first_ai_message = session.query(Message).filter(
+                    Message.chat_id == chat_id,
+                    Message.sender == 'ai'
+                ).first()
+                if not first_ai_message and chat.title == 'New Chat':
+                    # Get the user's first message
+                    first_user_message = session.query(Message).filter(
+                        Message.chat_id == chat_id,
+                        Message.sender == 'user'
+                    ).order_by(Message.timestamp).first()
+                    if first_user_message:
+                        # Generate title from user query
+                        new_title = self.generate_title_from_query(first_user_message.content)
+                        chat.title = new_title
+            session.commit()
+            return {
+                "message_id": message_id,
+                "chat_id": message.chat_id,
+                "content": message.content,
+                "sender": message.sender,
+                "timestamp": message.timestamp.isoformat()
+            }
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error adding message: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def get_chat(self, chat_id: int, user_id: Optional[int] = None) -> Dict[str, Any]:
+        """
+        Get a chat by ID with all its messages.
+        Args:
+            chat_id: ID of the chat to retrieve
+            user_id: Optional user ID to verify ownership
+        Returns:
+            Dictionary containing chat information and messages
+        """
+        session = self.Session()
+        try:
+            # Get the chat
+            query = session.query(Chat).filter(Chat.chat_id == chat_id)
+            # If user_id is provided, ensure the chat belongs to this user
+            if user_id is not None:
+                query = query.filter((Chat.user_id == user_id) | (Chat.user_id.is_(None)))
+            chat = query.first()
+            if not chat:
+                raise ValueError(f"Chat with ID {chat_id} not found or access denied")
+            # Get the chat messages ordered by timestamp
+            messages = session.query(Message).filter(
+                Message.chat_id == chat_id
+            ).order_by(Message.timestamp).all()
+            # Create a safe serializable dictionary
+            return {
+                "chat_id": chat.chat_id,
+                "title": chat.title,
+                "created_at": chat.created_at.isoformat() if chat.created_at else None,
+                "user_id": chat.user_id,
+                "messages": [
+                    {
+                        "message_id": msg.message_id,
+                        "chat_id": msg.chat_id,
+                        "content": msg.content,
+                        "sender": msg.sender,
+                        "timestamp": msg.timestamp.isoformat() if msg.timestamp else None
+                    } for msg in messages
+                ]
+            }
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error retrieving chat: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def get_user_chats(self, user_id: Optional[int] = None, limit: int = 10, offset: int = 0) -> List[Dict[str, Any]]:
+        """
+        Get recent chats for a user, or all chats if no user_id is provided.
+        Args:
+            user_id: Optional user ID to filter chats
+            limit: Maximum number of chats to return
+            offset: Number of chats to skip (for pagination)
+        Returns:
+            List of dictionaries containing chat information
+        """
+        session = self.Session()
+        try:
+            query = session.query(Chat)
+            # Filter by user_id if provided
+            if user_id is not None:
+                query = query.filter(Chat.user_id == user_id)
+            # Apply safe limits for both SQLite and PostgreSQL
+            safe_limit = min(max(1, limit), 100)  # Between 1 and 100
+            safe_offset = max(0, offset)          # At least 0
+            chats = query.order_by(Chat.created_at.desc()).limit(safe_limit).offset(safe_offset).all()
+            return [
+                {
+                    "chat_id": chat.chat_id,
+                    "user_id": chat.user_id,
+                    "title": chat.title,
+                    "created_at": chat.created_at.isoformat() if chat.created_at else None
+                } for chat in chats
+            ]
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error retrieving chats: {str(e)}", level=logging.ERROR)
+            return []
+        finally:
+            session.close()
+    def delete_chat(self, chat_id: int, user_id: Optional[int] = None) -> bool:
+        """
+        Delete a chat and all its messages while preserving model usage records.
+        Args:
+            chat_id: ID of the chat to delete
+            user_id: Optional user ID to verify ownership
+        Returns:
+            True if deletion was successful, False otherwise
+        """
+        session = self.Session()
+        try:
+            # Fetch chat with ownership check if user_id provided
+            query = session.query(Chat).filter(Chat.chat_id == chat_id)
+            if user_id is not None:
+                query = query.filter(Chat.user_id == user_id)
+            chat = query.first()
+            if not chat:
+                return False  # Chat not found or ownership mismatch
+            # ORM-based deletion with model_usage preservation
+            # The SET NULL in the foreign key should handle this, but we ensure it explicitly for both
+            # SQLite and PostgreSQL compatibility
+            # For SQLite which might not respect ondelete="SET NULL" always:
+            # Update model_usage records to set chat_id to NULL
+            session.query(ModelUsage).filter(ModelUsage.chat_id == chat_id).update(
+                {"chat_id": None}, synchronize_session=False
+            )
+            # Now delete the chat - relationships will handle cascading to messages
+            session.delete(chat)
+            session.commit()
+            return True
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error deleting chat: {str(e)}", level=logging.ERROR)
+            return False
+        finally:
+            session.close()
+    def get_or_create_user(self, username: str, email: str) -> Dict[str, Any]:
+        """
+        Get an existing user by email or create a new one if not found.
+        Args:
+            username: User's display name
+            email: User's email address
+        Returns:
+            Dictionary containing user information
+        """
+        session = self.Session()
+        try:
+            # Validate and sanitize inputs
+            if not email or not isinstance(email, str):
+                raise ValueError("Valid email is required")
+            # Limit input length for PostgreSQL compatibility
+            max_length = 255  # Standard limit for varchar fields
+            if username and len(username) > max_length:
+                username = username[:max_length]
+            if email and len(email) > max_length:
+                email = email[:max_length]
+            # Try to find existing user by email
+            user = session.query(User).filter(User.email == email).first()
+            if not user:
+                # Create new user if not found
+                user = User(username=username, email=email)
+                session.add(user)
+                session.flush()  # Get ID before committing
+                user_id = user.user_id
+                session.commit()
+                logger.log_message(f"Created new user: {username} ({email})", level=logging.INFO)
+            else:
+                user_id = user.user_id
+            return {
+                "user_id": user_id,
+                "username": user.username,
+                "email": user.email,
+                "created_at": user.created_at.isoformat() if user.created_at else None
+            }
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error getting/creating user: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def update_chat(self, chat_id: int, title: Optional[str] = None, user_id: Optional[int] = None) -> Dict[str, Any]:
+        """
+        Update a chat's title or user_id.
+        Args:
+            chat_id: ID of the chat to update
+            title: New title for the chat (optional)
+            user_id: New user ID for the chat (optional)
+        Returns:
+            Dictionary containing updated chat information
+        """
+        session = self.Session()
+        try:
+            # Get the chat
+            chat = session.query(Chat).filter(Chat.chat_id == chat_id).first()
+            if not chat:
+                raise ValueError(f"Chat with ID {chat_id} not found")
+            # Update fields if provided
+            if title is not None:
+                # Limit title length for PostgreSQL compatibility
+                if len(title) > 255:  # Assuming String column has a reasonable length
+                    title = title[:255]
+                chat.title = title
+            if user_id is not None:
+                chat.user_id = user_id
+            session.commit()
+            return {
+                "chat_id": chat.chat_id,
+                "title": chat.title,
+                "created_at": chat.created_at.isoformat() if chat.created_at else None,
+                "user_id": chat.user_id
+            }
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error updating chat: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def generate_title_from_query(self, query: str) -> str:
+        """
+        Generate a title for a chat based on the first query.
+        Args:
+            query: The user's first query in the chat
+        Returns:
+            A generated title string
+        """
+        try:
+            # Validate input
+            if not query or not isinstance(query, str):
+                return "New Chat"
+            # Simple title generation - take first few words
+            words = query.strip().split()
+            if len(words) > 3:
+                title = "Chat about " + " ".join(words[0:3]) + "..."
+            else:
+                title = "Chat about " + query.strip()
+            # Limit title length for PostgreSQL compatibility
+            max_title_length = 255
+            if len(title) > max_title_length:
+                title = title[:max_title_length-3] + "..."
+            return title
+        except Exception as e:
+            logger.log_message(f"Error generating title: {str(e)}", level=logging.ERROR)
+            return "New Chat"
+    def delete_empty_chats(self, user_id: Optional[int] = None, is_admin: bool = False) -> int:
+        """
+        Delete empty chats (chats with no messages) for a user.
+        Args:
+            user_id: ID of the user whose empty chats should be deleted
+            is_admin: Whether this is an admin user
+        Returns:
+            Number of chats deleted
+        """
+        session = self.Session()
+        try:
+            # Get all chats for the user
+            query = session.query(Chat)
+            if user_id is not None:
+                query = query.filter(Chat.user_id == user_id)
+            elif not is_admin:
+                return 0  # Don't delete anything if not a user or admin
+            # Get chats with no messages using a subquery - works in both SQLite and PostgreSQL
+            empty_chats = query.filter(
+                ~exists().where(Message.chat_id == Chat.chat_id)
+            ).all()
+            # Collect chat IDs to delete
+            chat_ids = [chat.chat_id for chat in empty_chats]
+            deleted_count = 0
+            if chat_ids:
+                # Update model_usage records to set chat_id to NULL for any associated usage records
+                session.query(ModelUsage).filter(ModelUsage.chat_id.in_(chat_ids)).update(
+                    {"chat_id": None}, synchronize_session=False
+                )
+                # Delete the empty chats one by one to ensure proper relationship handling
+                for chat_id in chat_ids:
+                    chat = session.query(Chat).filter(Chat.chat_id == chat_id).first()
+                    if chat:
+                        session.delete(chat)
+                        deleted_count += 1
+                session.commit()
+            return deleted_count
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error deleting empty chats: {str(e)}", level=logging.ERROR)
+            return 0
+        finally:
+            session.close()
+    def get_usage_summary(self, start_date: Optional[datetime] = None,
+                          end_date: Optional[datetime] = None) -> Dict[str, Any]:
+        """
+        Get a summary of model usage including total costs, tokens, and usage by model.
+        Args:
+            start_date: Optional start date for the summary period
+            end_date: Optional end date for the summary period
+        Returns:
+            Dictionary containing usage summary
+        """
+        session = self.Session()
+        try:
+            # Build base query - convert None values to default values for compatibility
+            base_query = session.query(ModelUsage)
+            # Apply date filters
+            if start_date:
+                base_query = base_query.filter(ModelUsage.timestamp >= start_date)
+            if end_date:
+                base_query = base_query.filter(ModelUsage.timestamp <= end_date)
+            # Get summary data using aggregate functions
+            summary_query = session.query(
+                func.coalesce(func.sum(ModelUsage.cost), 0.0).label("total_cost"),
+                func.coalesce(func.sum(ModelUsage.prompt_tokens), 0).label("total_prompt_tokens"),
+                func.coalesce(func.sum(ModelUsage.completion_tokens), 0).label("total_completion_tokens"),
+                func.coalesce(func.sum(ModelUsage.total_tokens), 0).label("total_tokens"),
+                func.count(ModelUsage.usage_id).label("request_count"),
+                func.coalesce(func.avg(ModelUsage.request_time_ms), 0.0).label("avg_request_time")
+            ).select_from(base_query.subquery())
+            result = summary_query.first()
+            # Get usage breakdown by model - using the same base query for consistency
+            model_query = session.query(
+                ModelUsage.model_name,
+                func.coalesce(func.sum(ModelUsage.cost), 0.0).label("model_cost"),
+                func.coalesce(func.sum(ModelUsage.total_tokens), 0).label("model_tokens"),
+                func.count(ModelUsage.usage_id).label("model_requests")
+            ).select_from(base_query.subquery()).group_by(ModelUsage.model_name)
+            model_breakdown = model_query.all()
+            # Get usage breakdown by provider using the same base query
+            provider_query = session.query(
+                ModelUsage.provider,
+                func.coalesce(func.sum(ModelUsage.cost), 0.0).label("provider_cost"),
+                func.coalesce(func.sum(ModelUsage.total_tokens), 0).label("provider_tokens"),
+                func.count(ModelUsage.usage_id).label("provider_requests")
+            ).select_from(base_query.subquery()).group_by(ModelUsage.provider)
+            provider_breakdown = provider_query.all()
+            # Get top users by cost
+            user_query = session.query(
+                ModelUsage.user_id,
+                func.coalesce(func.sum(ModelUsage.cost), 0.0).label("user_cost"),
+                func.coalesce(func.sum(ModelUsage.total_tokens), 0).label("user_tokens"),
+                func.count(ModelUsage.usage_id).label("user_requests")
+            ).select_from(base_query.subquery()).group_by(ModelUsage.user_id).order_by(
+                func.sum(ModelUsage.cost).desc()
+            ).limit(10)
+            user_breakdown = user_query.all()
+            # Handle the result data carefully to avoid None/NULL issues
+            return {
+                "summary": {
+                    "total_cost": float(result.total_cost) if result and result.total_cost is not None else 0.0,
+                    "total_prompt_tokens": int(result.total_prompt_tokens) if result and result.total_prompt_tokens is not None else 0,
+                    "total_completion_tokens": int(result.total_completion_tokens) if result and result.total_completion_tokens is not None else 0,
+                    "total_tokens": int(result.total_tokens) if result and result.total_tokens is not None else 0,
+                    "request_count": int(result.request_count) if result and result.request_count is not None else 0,
+                    "avg_request_time_ms": float(result.avg_request_time) if result and result.avg_request_time is not None else 0.0
+                },
+                "model_breakdown": [
+                    {
+                        "model_name": model.model_name,
+                        "cost": float(model.model_cost) if model.model_cost is not None else 0.0,
+                        "tokens": int(model.model_tokens) if model.model_tokens is not None else 0,
+                        "requests": int(model.model_requests) if model.model_requests is not None else 0
+                    } for model in model_breakdown
+                ],
+                "provider_breakdown": [
+                    {
+                        "provider": provider.provider,
+                        "cost": float(provider.provider_cost) if provider.provider_cost is not None else 0.0,
+                        "tokens": int(provider.provider_tokens) if provider.provider_tokens is not None else 0,
+                        "requests": int(provider.provider_requests) if provider.provider_requests is not None else 0
+                    } for provider in provider_breakdown
+                ],
+                "top_users": [
+                    {
+                        "user_id": user.user_id,
+                        "cost": float(user.user_cost) if user.user_cost is not None else 0.0,
+                        "tokens": int(user.user_tokens) if user.user_tokens is not None else 0,
+                        "requests": int(user.user_requests) if user.user_requests is not None else 0
+                    } for user in user_breakdown
+                ]
+            }
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error retrieving usage summary: {str(e)}", level=logging.ERROR)
+            return {
+                "summary": {
+                    "total_cost": 0.0,
+                    "total_tokens": 0,
+                    "request_count": 0
+                },
+                "model_breakdown": [],
+                "provider_breakdown": [],
+                "top_users": []
+            }
+        finally:
+            session.close()
+    def get_recent_chat_history(self, chat_id: int, limit: int = 5) -> List[Dict[str, Any]]:
+        """
+        Get recent message history for a chat, limited to the last 'limit' messages.
+        Args:
+            chat_id: ID of the chat to get history for
+            limit: Maximum number of recent messages to return
+        Returns:
+            List of dictionaries containing message information
+        """
+        session = self.Session()
+        try:
+            # Ensure safe limit for both databases
+            safe_limit = min(max(1, limit), 50) * 2  # Between 2 and 100 messages
+            # Use subquery for more efficient pagination in PostgreSQL
+            subquery = session.query(Message).filter(
+                Message.chat_id == chat_id
+            ).order_by(Message.timestamp.desc()).limit(safe_limit).subquery()
+            # Query from the subquery and sort in chronological order
+            messages = session.query(Message).from_statement(
+                session.query(subquery).order_by(subquery.c.timestamp).statement
+            ).all()
+            return [
+                {
+                    "message_id": msg.message_id,
+                    "chat_id": msg.chat_id,
+                    "content": msg.content,
+                    "sender": msg.sender,
+                    "timestamp": msg.timestamp.isoformat() if msg.timestamp else None
+                } for msg in messages
+            ]
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error retrieving chat history: {str(e)}", level=logging.ERROR)
+            return []
+        finally:
+            session.close()
+    def extract_response_history(self, messages: List[Dict[str, Any]]) -> str:
+        """
+        Extract response history from message history.
+        Args:
+            messages: List of message dictionaries
+        Returns:
+            String containing combined response history in a structured format
+        """
+        summaries = []
+        user_messages = []
+        # Input validation
+        if not messages or not isinstance(messages, list):
+            return ""
+        try:
+            for msg in messages:
+                # Skip invalid messages
+                if not isinstance(msg, dict):
+                    continue
+                # Get User Messages
+                if msg.get("sender") == "user":
+                    user_messages.append(msg)
+                # Ensure content exists and is from AI before extracting summary
+                if msg.get("sender") == "ai" and "content" in msg and msg["content"]:
+                    content = msg["content"]
+                    # Use a safer regex pattern with timeout protection
+                    try:
+                        matches = re.findall(r"### Summary\n(.*?)(?=\n\n##|\Z)", content, re.DOTALL)
+                        summaries.extend(match.strip() for match in matches if match)
+                    except Exception as e:
+                        logger.log_message(f"Error extracting summaries: {str(e)}", level=logging.ERROR)
+            # Combine user messages with summaries in a structured format
+            combined_conversations = []
+            for i, user_msg in enumerate(user_messages):
+                if i < len(summaries):
+                    # Ensure content exists and is not too long
+                    user_content = user_msg.get('content', '')
+                    if user_content and isinstance(user_content, str):
+                        # Truncate if needed
+                        if len(user_content) > 500:
+                            user_content = user_content[:497] + "..."
+                        summary = summaries[i]
+                        if len(summary) > 500:
+                            summary = summary[:497] + "..."
+                        combined_conversations.append(f"Query: {user_content}\nSummary: {summary}")
+            # Return the last 3 conversations to maintain context
+            formatted_context = "\n\n".join(combined_conversations[-3:])
+            # Add a clear header to indicate this is past interaction history
+            if formatted_context:
+                return f"### Previous Interaction History:\n{formatted_context}"
+            return ""
+        except Exception as e:
+            logger.log_message(f"Error in extract_response_history: {str(e)}", level=logging.ERROR)
+            return ""
+    def add_message_feedback(self, message_id: int, rating: int,
+                           model_settings: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """
+        Add or update feedback for a message.
+        Args:
+            message_id: ID of the message to add feedback for
+            rating: Star rating (1-5)
+            model_settings: Optional dictionary containing model settings (name, provider, temperature, etc.)
+        Returns:
+            Dictionary containing feedback information
+        """
+        session = self.Session()
+        try:
+            # Check if message exists
+            message = session.query(Message).filter(Message.message_id == message_id).first()
+            if not message:
+                raise ValueError(f"Message with ID {message_id} not found")
+            # Check if feedback already exists
+            existing_feedback = session.query(MessageFeedback).filter(
+                MessageFeedback.message_id == message_id
+            ).first()
+            now = datetime.now(UTC)
+            # Extract model settings
+            model_name = None
+            model_provider = None
+            temperature = None
+            max_tokens = None
+            if model_settings:
+                model_name = model_settings.get('model_name')
+                model_provider = model_settings.get('model_provider')
+                temperature = model_settings.get('temperature')
+                max_tokens = model_settings.get('max_tokens')
+            if existing_feedback:
+                # Update existing feedback
+                existing_feedback.rating = rating
+                existing_feedback.model_name = model_name
+                existing_feedback.model_provider = model_provider
+                existing_feedback.temperature = temperature
+                existing_feedback.max_tokens = max_tokens
+                existing_feedback.updated_at = now
+                feedback_record = existing_feedback
+            else:
+                # Create new feedback
+                feedback_record = MessageFeedback(
+                    message_id=message_id,
+                    rating=rating,
+                    model_name=model_name,
+                    model_provider=model_provider,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    created_at=now,
+                    updated_at=now
+                )
+                session.add(feedback_record)
+            session.commit()
+            return {
+                "feedback_id": feedback_record.feedback_id,
+                "message_id": feedback_record.message_id,
+                "rating": feedback_record.rating,
+                "model_name": feedback_record.model_name,
+                "model_provider": feedback_record.model_provider,
+                "temperature": feedback_record.temperature,
+                "max_tokens": feedback_record.max_tokens,
+                "created_at": feedback_record.created_at.isoformat(),
+                "updated_at": feedback_record.updated_at.isoformat()
+            }
+        except SQLAlchemyError as e:
+            session.rollback()
+            logger.log_message(f"Error adding feedback: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def get_message_feedback(self, message_id: int) -> Optional[Dict[str, Any]]:
+        """
+        Get feedback for a specific message.
+        Args:
+            message_id: ID of the message to get feedback for
+        Returns:
+            Dictionary containing feedback information or None if no feedback exists
+        """
+        session = self.Session()
+        try:
+            feedback = session.query(MessageFeedback).filter(
+                MessageFeedback.message_id == message_id
+            ).first()
+            if not feedback:
+                return None
+            return {
+                "feedback_id": feedback.feedback_id,
+                "message_id": feedback.message_id,
+                "rating": feedback.rating,
+                "model_name": feedback.model_name,
+                "model_provider": feedback.model_provider,
+                "temperature": feedback.temperature,
+                "max_tokens": feedback.max_tokens,
+                "created_at": feedback.created_at.isoformat(),
+                "updated_at": feedback.updated_at.isoformat()
+            }
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error getting feedback: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def get_chat_feedback(self, chat_id: int) -> List[Dict[str, Any]]:
+        """
+        Get all feedback for messages in a specific chat.
+        Args:
+            chat_id: ID of the chat to get feedback for
+        Returns:
+            List of dictionaries containing feedback information
+        """
+        session = self.Session()
+        try:
+            feedback_records = session.query(MessageFeedback).join(
+                Message, Message.message_id == MessageFeedback.message_id
+            ).filter(
+                Message.chat_id == chat_id
+            ).all()
+            return [{
+                "feedback_id": feedback.feedback_id,
+                "message_id": feedback.message_id,
+                "rating": feedback.rating,
+                "model_name": feedback.model_name,
+                "model_provider": feedback.model_provider,
+                "temperature": feedback.temperature,
+                "max_tokens": feedback.max_tokens,
+                "created_at": feedback.created_at.isoformat(),
+                "updated_at": feedback.updated_at.isoformat()
+            } for feedback in feedback_records]
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error getting chat feedback: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()
+    def get_feedback_statistics(self, user_id: Optional[int] = None,
+                              start_date: Optional[datetime] = None,
+                              end_date: Optional[datetime] = None) -> Dict[str, Any]:
+        """
+        Get feedback statistics for analysis.
+        Args:
+            user_id: Optional user ID to filter by
+            start_date: Optional start date to filter by
+            end_date: Optional end date to filter by
+        Returns:
+            Dictionary containing feedback statistics
+        """
+        session = self.Session()
+        try:
+            # Base query for all feedback
+            query = session.query(MessageFeedback).join(
+                Message, Message.message_id == MessageFeedback.message_id
+            )
+            # Apply filters if provided
+            if user_id is not None:
+                query = query.join(Chat, Chat.chat_id == Message.chat_id).filter(
+                    Chat.user_id == user_id
+                )
+            if start_date is not None:
+                query = query.filter(MessageFeedback.created_at >= start_date)
+            if end_date is not None:
+                query = query.filter(MessageFeedback.created_at <= end_date)
+            # Get all feedback records
+            feedback_records = query.all()
+            # Calculate statistics
+            if not feedback_records:
+                return {
+                    "total_feedback_count": 0,
+                    "average_rating": 0,
+                    "rating_distribution": {
+                        "1": 0, "2": 0, "3": 0, "4": 0, "5": 0
+                    },
+                    "model_ratings": {}
+                }
+            # Calculate average rating
+            ratings = [record.rating for record in feedback_records if record.rating is not None]
+            average_rating = sum(ratings) / len(ratings) if ratings else 0
+            # Calculate rating distribution
+            rating_distribution = {
+                "1": 0, "2": 0, "3": 0, "4": 0, "5": 0
+            }
+            for record in feedback_records:
+                if record.rating is not None:
+                    rating_distribution[str(record.rating)] += 1
+            # Calculate ratings by model
+            model_ratings = {}
+            for record in feedback_records:
+                if record.model_name and record.rating is not None:
+                    if record.model_name not in model_ratings:
+                        model_ratings[record.model_name] = {
+                            "count": 0,
+                            "total": 0,
+                            "average": 0
+                        }
+                    model_ratings[record.model_name]["count"] += 1
+                    model_ratings[record.model_name]["total"] += record.rating
+            # Calculate average for each model
+            for model_name, data in model_ratings.items():
+                data["average"] = data["total"] / data["count"] if data["count"] > 0 else 0
+            return {
+                "total_feedback_count": len(feedback_records),
+                "average_rating": average_rating,
+                "rating_distribution": rating_distribution,
+                "model_ratings": model_ratings
+            }
+        except SQLAlchemyError as e:
+            logger.log_message(f"Error getting feedback statistics: {str(e)}", level=logging.ERROR)
+            raise
+        finally:
+            session.close()

src/managers/session_manager.py ADDED Viewed

	@@ -0,0 +1,437 @@

+import io
+import os
+import time
+import uuid
+import logging
+import pandas as pd
+from typing import Dict, Any, List
+from llama_index.core import Document, VectorStoreIndex
+from src.utils.logger import Logger
+from src.managers.user_manager import get_current_user
+from src.agents.agents import auto_analyst
+from src.agents.retrievers.retrievers import make_data
+from src.managers.chat_manager import ChatManager
+from dotenv import load_dotenv
+load_dotenv()
+# Initialize logger
+logger = Logger("session_manager", see_time=False, console_log=False)
+class SessionManager:
+    """
+    Manages session-specific state, including datasets, retrievers, and AI systems.
+    Handles creation, retrieval, and updating of sessions.
+    """
+    def __init__(self, styling_instructions: List[str], available_agents: Dict):
+        """
+        Initialize SessionManager with styling instructions and available agents
+        Args:
+            styling_instructions: List of styling instructions for visualization
+            available_agents: Dictionary of available agents (deprecated - agents now loaded from DB)
+        """
+        self.styling_instructions = styling_instructions
+        self._sessions = {}
+        self._default_df = None
+        self._default_retrievers = None
+        self._default_ai_system = None
+        self._make_data = None
+        # Initialize chat manager
+        self._dataset_description = "Housing Dataset"
+        self._default_name = "Housing.csv"
+        self._dataset_description = """This dataset contains residential property information with details about pricing, physical characteristics, and amenities. The data can be used for real estate market analysis, property valuation, and understanding the relationship between house features and prices.
+Key Features:
+- Property prices range from 1.75M to 13.3M (currency units)
+- Living areas from 1,650 to 16,200 (square units)
+- Properties vary from 1-6 bedrooms and 1-4 bathrooms
+- Various amenities tracked including parking, air conditioning, and hot water heating
+TECHNICAL CONSIDERATIONS FOR ANALYSIS:
+Numeric Columns:
+- price (int): Large values suggesting currency units; range 1.75M-13.3M
+- area (int): Square units measurement; range 1,650-16,200
+- bedrooms (int): Discrete values 1-6
+- bathrooms (int): Discrete values 1-4
+- stories (int): Discrete values 1-4
+- parking (int): Discrete values 0-3
+Binary Categorical Columns (stored as str):
+- mainroad (str): 'yes'/'no' - Consider boolean conversion
+- guestroom (str): 'yes'/'no' - Consider boolean conversion
+- basement (str): 'yes'/'no' - Consider boolean conversion
+- hotwaterheating (str): 'yes'/'no' - Consider boolean conversion
+- airconditioning (str): 'yes'/'no' - Consider boolean conversion
+- prefarea (str): 'yes'/'no' - Consider boolean conversion
+Other Categorical:
+- furnishingstatus (str): Categories include 'furnished', 'semi-furnished' - Consider one-hot encoding
+Data Handling Recommendations:
+1. Binary variables should be converted to boolean or numeric (0/1) for analysis
+2. Consider normalizing price and area values for certain analyses
+3. Furnishing status will need categorical encoding for numerical analysis
+4. No null values detected in the dataset
+5. All numeric columns are properly typed as numbers (no string conversion needed)
+6. Consider treating bedrooms, bathrooms, stories, and parking as categorical despite numeric storage
+This dataset appears clean with consistent formatting and no missing values, making it suitable for immediate analysis with appropriate categorical encoding.
+        """
+        self.styling_instructions = styling_instructions
+        self.available_agents = available_agents
+        self.chat_manager = ChatManager(db_url=os.getenv("DATABASE_URL"))
+        self.initialize_default_dataset()
+    def initialize_default_dataset(self):
+        """Initialize the default dataset and store it"""
+        try:
+            self._default_df = pd.read_csv("Housing.csv")
+            self._make_data = make_data(self._default_df, self._dataset_description)
+            self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
+            # Create default AI system - agents will be loaded from database
+            self._default_ai_system = auto_analyst(agents=[], retrievers=self._default_retrievers)
+        except Exception as e:
+            logger.log_message(f"Error initializing default dataset: {str(e)}", level=logging.ERROR)
+            raise e
+    def initialize_retrievers(self, styling_instructions: List[str], doc: List[str]):
+        """
+        Initialize retrievers for styling and data
+        Args:
+            styling_instructions: List of styling instructions
+            doc: List of document strings
+        Returns:
+            Dictionary containing style_index and dataframe_index
+        """
+        try:
+            style_index = VectorStoreIndex.from_documents([Document(text=x) for x in styling_instructions])
+            data_index = VectorStoreIndex.from_documents([Document(text=x) for x in doc])
+            return {"style_index": style_index, "dataframe_index": data_index}
+        except Exception as e:
+            logger.log_message(f"Error initializing retrievers: {str(e)}", level=logging.ERROR)
+            raise e
+    def get_session_state(self, session_id: str) -> Dict[str, Any]:
+        """
+        Get or create session-specific state
+        Args:
+            session_id: The session identifier
+        Returns:
+            Dictionary containing session state
+        """
+        # Use the global model config from app_state when available
+        # Get the most up-to-date model config
+        if hasattr(self, '_app_model_config') and self._app_model_config:
+            default_model_config = self._app_model_config
+        else:
+            default_model_config = {
+                "provider": os.getenv("MODEL_PROVIDER", "openai"),
+                "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
+                "api_key": os.getenv("OPENAI_API_KEY"),
+                "temperature": float(os.getenv("TEMPERATURE", 1.0)),
+                "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
+            }
+        if session_id not in self._sessions:
+            # Check if we need to create a brand new session
+            logger.log_message(f"Creating new session state for session_id: {session_id}", level=logging.INFO)
+            # Initialize with default state
+            self._sessions[session_id] = {
+                "current_df": self._default_df.copy() if self._default_df is not None else None,
+                "retrievers": self._default_retrievers,
+                "ai_system": self._default_ai_system,
+                "make_data": self._make_data,
+                "description": self._dataset_description,
+                "name": self._default_name,
+                "model_config": default_model_config,
+                "creation_time": time.time()
+            }
+        else:
+            # Verify dataset integrity in existing session
+            session = self._sessions[session_id]
+            # Always update model_config to match global settings
+            session["model_config"] = default_model_config
+            # If dataset is somehow missing, restore it
+            if "current_df" not in session or session["current_df"] is None:
+                logger.log_message(f"Restoring missing dataset for session {session_id}", level=logging.WARNING)
+                session["current_df"] = self._default_df.copy() if self._default_df is not None else None
+                session["retrievers"] = self._default_retrievers
+                session["ai_system"] = self._default_ai_system
+                session["description"] = self._dataset_description
+                session["name"] = self._default_name
+            # Ensure we have the basic required fields
+            if "name" not in session:
+                session["name"] = self._default_name
+            if "description" not in session:
+                session["description"] = self._dataset_description
+            # Update last accessed time
+            session["last_accessed"] = time.time()
+        return self._sessions[session_id]
+    def clear_session_state(self, session_id: str):
+        """
+        Clear session-specific state
+        Args:
+            session_id: The session identifier
+        """
+        if session_id in self._sessions:
+            del self._sessions[session_id]
+    def update_session_dataset(self, session_id: str, df, name: str, desc: str):
+        """
+        Update dataset for a specific session
+        Args:
+            session_id: The session identifier
+            df: Pandas DataFrame containing the dataset
+            name: Name of the dataset
+            desc: Description of the dataset
+        """
+        try:
+            self._make_data = make_data(df, desc)
+            retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
+            # Check if session has a user_id to create user-specific AI system
+            current_user_id = None
+            if session_id in self._sessions and "user_id" in self._sessions[session_id]:
+                current_user_id = self._sessions[session_id]["user_id"]
+            ai_system = self.create_ai_system_for_user(retrievers, current_user_id)
+            # Get default model config for new sessions
+            default_model_config = {
+                "provider": os.getenv("MODEL_PROVIDER", "openai"),
+                "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
+                "api_key": os.getenv("OPENAI_API_KEY"),
+                "temperature": float(os.getenv("TEMPERATURE", 1.0)),
+                "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
+            }
+            # Create a completely fresh session state for the new dataset
+            # This ensures no remnants of the previous dataset remain
+            session_state = {
+                "current_df": df,
+                "retrievers": retrievers,
+                "ai_system": ai_system,
+                "make_data": self._make_data,
+                "description": desc,
+                "name": name,
+                "model_config": default_model_config,  # Initialize with default
+            }
+            # Preserve user_id, chat_id, and model_config if they exist in the current session
+            if session_id in self._sessions:
+                if "user_id" in self._sessions[session_id]:
+                    session_state["user_id"] = self._sessions[session_id]["user_id"]
+                if "chat_id" in self._sessions[session_id]:
+                    session_state["chat_id"] = self._sessions[session_id]["chat_id"]
+                if "model_config" in self._sessions[session_id]:
+                    # Preserve the user's model configuration
+                    session_state["model_config"] = self._sessions[session_id]["model_config"]
+            # Replace the entire session with the new state
+            self._sessions[session_id] = session_state
+            logger.log_message(f"Updated session {session_id} with completely fresh dataset state: {name}", level=logging.INFO)
+        except Exception as e:
+            logger.log_message(f"Error updating dataset for session {session_id}: {str(e)}", level=logging.ERROR)
+            raise e
+    def reset_session_to_default(self, session_id: str):
+        """
+        Reset a session to use the default dataset
+        Args:
+            session_id: The session identifier
+        """
+        try:
+            # Get default model config from environment
+            default_model_config = {
+                "provider": os.getenv("MODEL_PROVIDER", "openai"),
+                "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
+                "api_key": os.getenv("OPENAI_API_KEY"),
+                "temperature": float(os.getenv("TEMPERATURE", 1.0)),
+                "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
+            }
+            # Clear any custom data associated with the session first
+            if session_id in self._sessions:
+                del self._sessions[session_id]
+                logger.log_message(f"Cleared existing state for session {session_id} before reset.", level=logging.INFO)
+            # Initialize with default state
+            self._sessions[session_id] = {
+                "current_df": self._default_df.copy(), # Use a copy
+                "retrievers": self._default_retrievers,
+                "ai_system": self._default_ai_system,
+                "description": self._dataset_description,
+                "name": self._default_name, # Explicitly set the default name
+                "make_data": None, # Clear any custom make_data
+                "model_config": default_model_config # Initialize with default model config
+            }
+            logger.log_message(f"Reset session {session_id} to default dataset: {self._default_name}", level=logging.INFO)
+        except Exception as e:
+            logger.log_message(f"Error resetting session {session_id}: {str(e)}", level=logging.ERROR)
+            raise e
+    def create_ai_system_for_user(self, retrievers, user_id=None):
+        """
+        Create an AI system with user-specific agents (including custom agents)
+        Args:
+            retrievers: The retrievers for the AI system
+            user_id: Optional user ID to load custom agents for
+        Returns:
+            An auto_analyst instance with all available agents (standard + custom)
+        """
+        try:
+            if user_id:
+                # Import here to avoid circular imports
+                from src.db.init_db import session_factory
+                # Create a database session
+                db_session = session_factory()
+                try:
+                    # Create AI system with user context to load custom agents
+                    ai_system = auto_analyst(
+                        agents=[],
+                        retrievers=retrievers,
+                        user_id=user_id,
+                        db_session=db_session
+                    )
+                    logger.log_message(f"Created AI system for user {user_id}", level=logging.INFO)
+                    return ai_system
+                finally:
+                    db_session.close()
+            else:
+                # Create standard AI system without custom agents
+                return auto_analyst(agents=[], retrievers=retrievers)
+        except Exception as e:
+            logger.log_message(f"Error creating AI system for user {user_id}: {str(e)}", level=logging.ERROR)
+            # Fallback to standard AI system
+            return auto_analyst(agents=[], retrievers=retrievers)
+    def set_session_user(self, session_id: str, user_id: int, chat_id: int = None):
+        """
+        Associate a user with a session
+        Args:
+            session_id: The session identifier
+            user_id: The authenticated user ID
+            chat_id: Optional chat ID for tracking conversation
+        Returns:
+            Updated session state dictionary
+        """
+        # Ensure we have a session state for this session ID
+        if session_id not in self._sessions:
+            self.get_session_state(session_id)  # Initialize with defaults
+        # Store user ID
+        self._sessions[session_id]["user_id"] = user_id
+        # Generate or use chat ID
+        if chat_id:
+            chat_id_to_use = chat_id
+        else:
+            # Check if chat_id already exists
+            if "chat_id" not in self._sessions[session_id] or not self._sessions[session_id]["chat_id"]:
+                # Use current timestamp + random number to generate a more readable ID
+                import random
+                chat_id_to_use = int(time.time() * 1000) % 1000000 + random.randint(1, 999)
+            else:
+                chat_id_to_use = self._sessions[session_id]["chat_id"]
+        # Store chat ID
+        self._sessions[session_id]["chat_id"] = chat_id_to_use
+        # Recreate AI system with user context to load custom agents
+        try:
+            session_retrievers = self._sessions[session_id]["retrievers"]
+            user_ai_system = self.create_ai_system_for_user(session_retrievers, user_id)
+            self._sessions[session_id]["ai_system"] = user_ai_system
+            logger.log_message(f"Updated AI system for session {session_id} with user {user_id}", level=logging.INFO)
+        except Exception as e:
+            logger.log_message(f"Error updating AI system for user {user_id}: {str(e)}", level=logging.ERROR)
+            # Continue with existing AI system if update fails
+        # Make sure this data gets saved
+        logger.log_message(f"Associated session {session_id} with user_id={user_id}, chat_id={chat_id_to_use}", level=logging.INFO)
+        # Return the updated session data
+        return self._sessions[session_id]
+async def get_session_id(request, session_manager):
+    """
+    Get the session ID from the request, create/associate a user if needed
+    Args:
+        request: FastAPI Request object
+        session_manager: SessionManager instance
+    Returns:
+        Session ID string
+    """
+    # First try to get from query params
+    session_id = request.query_params.get("session_id")
+    # If not in query params, try to get from headers
+    if not session_id:
+        session_id = request.headers.get("X-Session-ID")
+    # If still not found, generate a new one
+    if not session_id:
+        session_id = str(uuid.uuid4())
+    # Get or create the session state
+    session_state = session_manager.get_session_state(session_id)
+    # First, check if we already have a user associated with this session
+    if session_state.get("user_id") is not None:
+        return session_id
+    # Next, try to get authenticated user using the API key
+    current_user = await get_current_user(request)
+    if current_user:
+        # Use the authenticated user instead of creating a guest
+        session_manager.set_session_user(
+            session_id=session_id,
+            user_id=current_user.user_id
+        )
+        logger.log_message(f"Associated session {session_id} with authenticated user_id {current_user.user_id}", level=logging.INFO)
+        return session_id
+    # Check if a user_id was provided in the request params
+    user_id_param = request.query_params.get("user_id")
+    if user_id_param:
+        try:
+            user_id = int(user_id_param)
+            session_manager.set_session_user(session_id=session_id, user_id=user_id)
+            logger.log_message(f"Associated session {session_id} with provided user_id {user_id}", level=logging.INFO)
+            return session_id
+        except (ValueError, TypeError):
+            logger.log_message(f"Invalid user_id in query params: {user_id_param}", level=logging.WARNING)
+    # No user was found or created - just return the session ID without associating a user
+    logger.log_message(f"No authenticated user found for session {session_id}, continuing without user association", level=logging.INFO)
+    return session_id