Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/managers/session_manager.py
Browse files- src/managers/session_manager.py +46 -30
src/managers/session_manager.py
CHANGED
@@ -12,6 +12,9 @@ from src.managers.user_manager import create_user, get_current_user
|
|
12 |
from src.agents.agents import auto_analyst, auto_analyst_ind
|
13 |
from src.agents.retrievers.retrievers import make_data
|
14 |
from src.managers.chat_manager import ChatManager
|
|
|
|
|
|
|
15 |
|
16 |
# Initialize logger
|
17 |
logger = Logger("session_manager", see_time=False, console_log=False)
|
@@ -38,35 +41,48 @@ class SessionManager:
|
|
38 |
self._make_data = None
|
39 |
self._default_name = "Housing Dataset" # Default dataset name
|
40 |
|
41 |
-
self._dataset_description = """This
|
42 |
-
|
43 |
-
1. **price**: The sale price of the property (numeric).
|
44 |
-
2. **area**: The total area of the property in square feet (numeric).
|
45 |
-
3. **bedrooms**: The number of bedrooms in the property (integer).
|
46 |
-
4. **bathrooms**: The number of bathrooms in the property (integer).
|
47 |
-
5. **stories**: The number of stories in the house (integer).
|
48 |
-
6. **mainroad**: Indicates if the property is located on a main road ('yes' or 'no').
|
49 |
-
7. **guestroom**: Indicates the presence of a guest room ('yes' or 'no').
|
50 |
-
8. **basement**: Indicates if there is a basement ('yes' or 'no').
|
51 |
-
9. **hotwaterheating**: Indicates if the property has hot water heating ('yes' or 'no').
|
52 |
-
10. **airconditioning**: Indicates if the property has air conditioning ('yes' or 'no').
|
53 |
-
11. **parking**: The number of parking spots available (integer).
|
54 |
-
12. **prefarea**: Indicates if the property is located in a preferred area ('yes' or 'no').
|
55 |
-
13. **furnishingstatus**: The condition of the property in terms of furnishings (categorical).
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
58 |
|
59 |
TECHNICAL CONSIDERATIONS FOR ANALYSIS:
|
60 |
-
- The **price** and **area** columns are numeric but may need validation to ensure all entries are numeric values and free from formatting issues.
|
61 |
-
- The **mainroad**, **guestroom**, **basement**, **hotwaterheating**, **airconditioning**, **prefarea**, and **furnishingstatus** columns contain categorical data; ensure to encode these variables for any machine learning model or analysis.
|
62 |
-
- The **bedrooms**, **bathrooms**, **stories**, and **parking** columns are stored as integers; confirm that there are no erroneous string entries that could cause processing errors.
|
63 |
-
- Empty or null values should be handled appropriately, with a review of their occurrence to decide on imputation or deletion strategies.
|
64 |
-
- It is advisable to check for any outlier values, especially in the **price** and **area** columns, which may skew analysis results.
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
self.styling_instructions = styling_instructions
|
68 |
self.available_agents = available_agents
|
69 |
-
self.chat_manager = ChatManager(db_url=
|
70 |
|
71 |
self.initialize_default_dataset()
|
72 |
|
@@ -74,8 +90,8 @@ The refined description aims to equip both users and analysis agents with essent
|
|
74 |
"""Initialize the default dataset and store it"""
|
75 |
try:
|
76 |
self._default_df = pd.read_csv("Housing.csv")
|
77 |
-
|
78 |
-
self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(
|
79 |
self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
|
80 |
retrievers=self._default_retrievers)
|
81 |
except Exception as e:
|
@@ -119,7 +135,7 @@ The refined description aims to equip both users and analysis agents with essent
|
|
119 |
default_model_config = {
|
120 |
"provider": os.getenv("MODEL_PROVIDER", "openai"),
|
121 |
"model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
|
122 |
-
|
123 |
"temperature": float(os.getenv("TEMPERATURE", 1.0)),
|
124 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
125 |
}
|
@@ -188,8 +204,8 @@ The refined description aims to equip both users and analysis agents with essent
|
|
188 |
desc: Description of the dataset
|
189 |
"""
|
190 |
try:
|
191 |
-
|
192 |
-
retrievers = self.initialize_retrievers(self.styling_instructions, [str(
|
193 |
ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
|
194 |
|
195 |
# Get default model config for new sessions
|
@@ -207,7 +223,7 @@ The refined description aims to equip both users and analysis agents with essent
|
|
207 |
"current_df": df,
|
208 |
"retrievers": retrievers,
|
209 |
"ai_system": ai_system,
|
210 |
-
"make_data":
|
211 |
"description": desc,
|
212 |
"name": name,
|
213 |
"model_config": default_model_config, # Initialize with default
|
@@ -380,4 +396,4 @@ async def get_session_id(request, session_manager):
|
|
380 |
except Exception as e:
|
381 |
logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
|
382 |
|
383 |
-
return session_id
|
|
|
12 |
from src.agents.agents import auto_analyst, auto_analyst_ind
|
13 |
from src.agents.retrievers.retrievers import make_data
|
14 |
from src.managers.chat_manager import ChatManager
|
15 |
+
from dotenv import load_dotenv
|
16 |
+
|
17 |
+
load_dotenv()
|
18 |
|
19 |
# Initialize logger
|
20 |
logger = Logger("session_manager", see_time=False, console_log=False)
|
|
|
41 |
self._make_data = None
|
42 |
self._default_name = "Housing Dataset" # Default dataset name
|
43 |
|
44 |
+
self._dataset_description = """This dataset contains residential property information with details about pricing, physical characteristics, and amenities. The data can be used for real estate market analysis, property valuation, and understanding the relationship between house features and prices.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
Key Features:
|
47 |
+
- Property prices range from 1.75M to 13.3M (currency units)
|
48 |
+
- Living areas from 1,650 to 16,200 (square units)
|
49 |
+
- Properties vary from 1-6 bedrooms and 1-4 bathrooms
|
50 |
+
- Various amenities tracked including parking, air conditioning, and hot water heating
|
51 |
|
52 |
TECHNICAL CONSIDERATIONS FOR ANALYSIS:
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
Numeric Columns:
|
55 |
+
- price (int): Large values suggesting currency units; range 1.75M-13.3M
|
56 |
+
- area (int): Square units measurement; range 1,650-16,200
|
57 |
+
- bedrooms (int): Discrete values 1-6
|
58 |
+
- bathrooms (int): Discrete values 1-4
|
59 |
+
- stories (int): Discrete values 1-4
|
60 |
+
- parking (int): Discrete values 0-3
|
61 |
+
|
62 |
+
Binary Categorical Columns (stored as str):
|
63 |
+
- mainroad (str): 'yes'/'no' - Consider boolean conversion
|
64 |
+
- guestroom (str): 'yes'/'no' - Consider boolean conversion
|
65 |
+
- basement (str): 'yes'/'no' - Consider boolean conversion
|
66 |
+
- hotwaterheating (str): 'yes'/'no' - Consider boolean conversion
|
67 |
+
- airconditioning (str): 'yes'/'no' - Consider boolean conversion
|
68 |
+
- prefarea (str): 'yes'/'no' - Consider boolean conversion
|
69 |
+
|
70 |
+
Other Categorical:
|
71 |
+
- furnishingstatus (str): Categories include 'furnished', 'semi-furnished' - Consider one-hot encoding
|
72 |
+
|
73 |
+
Data Handling Recommendations:
|
74 |
+
1. Binary variables should be converted to boolean or numeric (0/1) for analysis
|
75 |
+
2. Consider normalizing price and area values for certain analyses
|
76 |
+
3. Furnishing status will need categorical encoding for numerical analysis
|
77 |
+
4. No null values detected in the dataset
|
78 |
+
5. All numeric columns are properly typed as numbers (no string conversion needed)
|
79 |
+
6. Consider treating bedrooms, bathrooms, stories, and parking as categorical despite numeric storage
|
80 |
+
|
81 |
+
This dataset appears clean with consistent formatting and no missing values, making it suitable for immediate analysis with appropriate categorical encoding.
|
82 |
+
"""
|
83 |
self.styling_instructions = styling_instructions
|
84 |
self.available_agents = available_agents
|
85 |
+
self.chat_manager = ChatManager(db_url=os.getenv("DATABASE_URL"))
|
86 |
|
87 |
self.initialize_default_dataset()
|
88 |
|
|
|
90 |
"""Initialize the default dataset and store it"""
|
91 |
try:
|
92 |
self._default_df = pd.read_csv("Housing.csv")
|
93 |
+
self._make_data = make_data(self._default_df, self._dataset_description)
|
94 |
+
self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
|
95 |
self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
|
96 |
retrievers=self._default_retrievers)
|
97 |
except Exception as e:
|
|
|
135 |
default_model_config = {
|
136 |
"provider": os.getenv("MODEL_PROVIDER", "openai"),
|
137 |
"model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
|
138 |
+
"api_key": os.getenv("OPENAI_API_KEY"),
|
139 |
"temperature": float(os.getenv("TEMPERATURE", 1.0)),
|
140 |
"max_tokens": int(os.getenv("MAX_TOKENS", 6000))
|
141 |
}
|
|
|
204 |
desc: Description of the dataset
|
205 |
"""
|
206 |
try:
|
207 |
+
self._make_data = make_data(df, desc)
|
208 |
+
retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
|
209 |
ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
|
210 |
|
211 |
# Get default model config for new sessions
|
|
|
223 |
"current_df": df,
|
224 |
"retrievers": retrievers,
|
225 |
"ai_system": ai_system,
|
226 |
+
"make_data": self._make_data,
|
227 |
"description": desc,
|
228 |
"name": name,
|
229 |
"model_config": default_model_config, # Initialize with default
|
|
|
396 |
except Exception as e:
|
397 |
logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
|
398 |
|
399 |
+
return session_id
|