FireBird-Tech commited on
Commit
e9a833a
·
verified ·
1 Parent(s): 7b969cb

Update src/managers/session_manager.py

Browse files
Files changed (1) hide show
  1. src/managers/session_manager.py +46 -30
src/managers/session_manager.py CHANGED
@@ -12,6 +12,9 @@ from src.managers.user_manager import create_user, get_current_user
12
  from src.agents.agents import auto_analyst, auto_analyst_ind
13
  from src.agents.retrievers.retrievers import make_data
14
  from src.managers.chat_manager import ChatManager
 
 
 
15
 
16
  # Initialize logger
17
  logger = Logger("session_manager", see_time=False, console_log=False)
@@ -38,35 +41,48 @@ class SessionManager:
38
  self._make_data = None
39
  self._default_name = "Housing Dataset" # Default dataset name
40
 
41
- self._dataset_description = """This real estate dataset comprises 545 entries of residential properties, featuring essential characteristics that facilitate market analysis, price estimation, and buyer preferences. The dataset includes the following columns:
42
-
43
- 1. **price**: The sale price of the property (numeric).
44
- 2. **area**: The total area of the property in square feet (numeric).
45
- 3. **bedrooms**: The number of bedrooms in the property (integer).
46
- 4. **bathrooms**: The number of bathrooms in the property (integer).
47
- 5. **stories**: The number of stories in the house (integer).
48
- 6. **mainroad**: Indicates if the property is located on a main road ('yes' or 'no').
49
- 7. **guestroom**: Indicates the presence of a guest room ('yes' or 'no').
50
- 8. **basement**: Indicates if there is a basement ('yes' or 'no').
51
- 9. **hotwaterheating**: Indicates if the property has hot water heating ('yes' or 'no').
52
- 10. **airconditioning**: Indicates if the property has air conditioning ('yes' or 'no').
53
- 11. **parking**: The number of parking spots available (integer).
54
- 12. **prefarea**: Indicates if the property is located in a preferred area ('yes' or 'no').
55
- 13. **furnishingstatus**: The condition of the property in terms of furnishings (categorical).
56
 
57
- The dataset serves as a valuable resource for analyzing real estate trends, identifying factors influencing property pricing, and understanding buyer requirements across different residential layouts.
 
 
 
 
58
 
59
  TECHNICAL CONSIDERATIONS FOR ANALYSIS:
60
- - The **price** and **area** columns are numeric but may need validation to ensure all entries are numeric values and free from formatting issues.
61
- - The **mainroad**, **guestroom**, **basement**, **hotwaterheating**, **airconditioning**, **prefarea**, and **furnishingstatus** columns contain categorical data; ensure to encode these variables for any machine learning model or analysis.
62
- - The **bedrooms**, **bathrooms**, **stories**, and **parking** columns are stored as integers; confirm that there are no erroneous string entries that could cause processing errors.
63
- - Empty or null values should be handled appropriately, with a review of their occurrence to decide on imputation or deletion strategies.
64
- - It is advisable to check for any outlier values, especially in the **price** and **area** columns, which may skew analysis results.
65
 
66
- The refined description aims to equip both users and analysis agents with essential contextual information about the dataset as well as actionable insights for conducting robust analyses."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  self.styling_instructions = styling_instructions
68
  self.available_agents = available_agents
69
- self.chat_manager = ChatManager(db_url='sqlite:///chat_database.db')
70
 
71
  self.initialize_default_dataset()
72
 
@@ -74,8 +90,8 @@ The refined description aims to equip both users and analysis agents with essent
74
  """Initialize the default dataset and store it"""
75
  try:
76
  self._default_df = pd.read_csv("Housing.csv")
77
- data_dict = make_data(self._default_df, self._dataset_description)
78
- self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(data_dict)])
79
  self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
80
  retrievers=self._default_retrievers)
81
  except Exception as e:
@@ -119,7 +135,7 @@ The refined description aims to equip both users and analysis agents with essent
119
  default_model_config = {
120
  "provider": os.getenv("MODEL_PROVIDER", "openai"),
121
  "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
122
- "api_key": os.getenv("OPENAI_API_KEY"),
123
  "temperature": float(os.getenv("TEMPERATURE", 1.0)),
124
  "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
125
  }
@@ -188,8 +204,8 @@ The refined description aims to equip both users and analysis agents with essent
188
  desc: Description of the dataset
189
  """
190
  try:
191
- data_dict = make_data(df, desc)
192
- retrievers = self.initialize_retrievers(self.styling_instructions, [str(data_dict)])
193
  ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
194
 
195
  # Get default model config for new sessions
@@ -207,7 +223,7 @@ The refined description aims to equip both users and analysis agents with essent
207
  "current_df": df,
208
  "retrievers": retrievers,
209
  "ai_system": ai_system,
210
- "make_data": data_dict,
211
  "description": desc,
212
  "name": name,
213
  "model_config": default_model_config, # Initialize with default
@@ -380,4 +396,4 @@ async def get_session_id(request, session_manager):
380
  except Exception as e:
381
  logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
382
 
383
- return session_id
 
12
  from src.agents.agents import auto_analyst, auto_analyst_ind
13
  from src.agents.retrievers.retrievers import make_data
14
  from src.managers.chat_manager import ChatManager
15
+ from dotenv import load_dotenv
16
+
17
+ load_dotenv()
18
 
19
  # Initialize logger
20
  logger = Logger("session_manager", see_time=False, console_log=False)
 
41
  self._make_data = None
42
  self._default_name = "Housing Dataset" # Default dataset name
43
 
44
+ self._dataset_description = """This dataset contains residential property information with details about pricing, physical characteristics, and amenities. The data can be used for real estate market analysis, property valuation, and understanding the relationship between house features and prices.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ Key Features:
47
+ - Property prices range from 1.75M to 13.3M (currency units)
48
+ - Living areas from 1,650 to 16,200 (square units)
49
+ - Properties vary from 1-6 bedrooms and 1-4 bathrooms
50
+ - Various amenities tracked including parking, air conditioning, and hot water heating
51
 
52
  TECHNICAL CONSIDERATIONS FOR ANALYSIS:
 
 
 
 
 
53
 
54
+ Numeric Columns:
55
+ - price (int): Large values suggesting currency units; range 1.75M-13.3M
56
+ - area (int): Square units measurement; range 1,650-16,200
57
+ - bedrooms (int): Discrete values 1-6
58
+ - bathrooms (int): Discrete values 1-4
59
+ - stories (int): Discrete values 1-4
60
+ - parking (int): Discrete values 0-3
61
+
62
+ Binary Categorical Columns (stored as str):
63
+ - mainroad (str): 'yes'/'no' - Consider boolean conversion
64
+ - guestroom (str): 'yes'/'no' - Consider boolean conversion
65
+ - basement (str): 'yes'/'no' - Consider boolean conversion
66
+ - hotwaterheating (str): 'yes'/'no' - Consider boolean conversion
67
+ - airconditioning (str): 'yes'/'no' - Consider boolean conversion
68
+ - prefarea (str): 'yes'/'no' - Consider boolean conversion
69
+
70
+ Other Categorical:
71
+ - furnishingstatus (str): Categories include 'furnished', 'semi-furnished' - Consider one-hot encoding
72
+
73
+ Data Handling Recommendations:
74
+ 1. Binary variables should be converted to boolean or numeric (0/1) for analysis
75
+ 2. Consider normalizing price and area values for certain analyses
76
+ 3. Furnishing status will need categorical encoding for numerical analysis
77
+ 4. No null values detected in the dataset
78
+ 5. All numeric columns are properly typed as numbers (no string conversion needed)
79
+ 6. Consider treating bedrooms, bathrooms, stories, and parking as categorical despite numeric storage
80
+
81
+ This dataset appears clean with consistent formatting and no missing values, making it suitable for immediate analysis with appropriate categorical encoding.
82
+ """
83
  self.styling_instructions = styling_instructions
84
  self.available_agents = available_agents
85
+ self.chat_manager = ChatManager(db_url=os.getenv("DATABASE_URL"))
86
 
87
  self.initialize_default_dataset()
88
 
 
90
  """Initialize the default dataset and store it"""
91
  try:
92
  self._default_df = pd.read_csv("Housing.csv")
93
+ self._make_data = make_data(self._default_df, self._dataset_description)
94
+ self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
95
  self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
96
  retrievers=self._default_retrievers)
97
  except Exception as e:
 
135
  default_model_config = {
136
  "provider": os.getenv("MODEL_PROVIDER", "openai"),
137
  "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
138
+ "api_key": os.getenv("OPENAI_API_KEY"),
139
  "temperature": float(os.getenv("TEMPERATURE", 1.0)),
140
  "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
141
  }
 
204
  desc: Description of the dataset
205
  """
206
  try:
207
+ self._make_data = make_data(df, desc)
208
+ retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
209
  ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
210
 
211
  # Get default model config for new sessions
 
223
  "current_df": df,
224
  "retrievers": retrievers,
225
  "ai_system": ai_system,
226
+ "make_data": self._make_data,
227
  "description": desc,
228
  "name": name,
229
  "model_config": default_model_config, # Initialize with default
 
396
  except Exception as e:
397
  logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
398
 
399
+ return session_id