Spaces:

FireBird-Tech
/

auto-analyst-backend

Running on CPU Upgrade

App Files Files Community

FireBird-Tech commited on 19 days ago

Commit

e9a833a

verified ·

1 Parent(s): 7b969cb

Update src/managers/session_manager.py

Browse files

Files changed (1) hide show

src/managers/session_manager.py +46 -30

src/managers/session_manager.py CHANGED Viewed

@@ -12,6 +12,9 @@ from src.managers.user_manager import create_user, get_current_user
 from src.agents.agents import auto_analyst, auto_analyst_ind
 from src.agents.retrievers.retrievers import make_data
 from src.managers.chat_manager import ChatManager
 # Initialize logger
 logger = Logger("session_manager", see_time=False, console_log=False)
@@ -38,35 +41,48 @@ class SessionManager:
         self._make_data = None
         self._default_name = "Housing Dataset"  # Default dataset name
-        self._dataset_description = """This real estate dataset comprises 545 entries of residential properties, featuring essential characteristics that facilitate market analysis, price estimation, and buyer preferences. The dataset includes the following columns:
-1. **price**: The sale price of the property (numeric).
-2. **area**: The total area of the property in square feet (numeric).
-3. **bedrooms**: The number of bedrooms in the property (integer).
-4. **bathrooms**: The number of bathrooms in the property (integer).
-5. **stories**: The number of stories in the house (integer).
-6. **mainroad**: Indicates if the property is located on a main road ('yes' or 'no').
-7. **guestroom**: Indicates the presence of a guest room ('yes' or 'no').
-8. **basement**: Indicates if there is a basement ('yes' or 'no').
-9. **hotwaterheating**: Indicates if the property has hot water heating ('yes' or 'no').
-10. **airconditioning**: Indicates if the property has air conditioning ('yes' or 'no').
-11. **parking**: The number of parking spots available (integer).
-12. **prefarea**: Indicates if the property is located in a preferred area ('yes' or 'no').
-13. **furnishingstatus**: The condition of the property in terms of furnishings (categorical).
-The dataset serves as a valuable resource for analyzing real estate trends, identifying factors influencing property pricing, and understanding buyer requirements across different residential layouts.
 TECHNICAL CONSIDERATIONS FOR ANALYSIS:
-- The **price** and **area** columns are numeric but may need validation to ensure all entries are numeric values and free from formatting issues.
-- The **mainroad**, **guestroom**, **basement**, **hotwaterheating**, **airconditioning**, **prefarea**, and **furnishingstatus** columns contain categorical data; ensure to encode these variables for any machine learning model or analysis.
-- The **bedrooms**, **bathrooms**, **stories**, and **parking** columns are stored as integers; confirm that there are no erroneous string entries that could cause processing errors.
-- Empty or null values should be handled appropriately, with a review of their occurrence to decide on imputation or deletion strategies.
-- It is advisable to check for any outlier values, especially in the **price** and **area** columns, which may skew analysis results.
-The refined description aims to equip both users and analysis agents with essential contextual information about the dataset as well as actionable insights for conducting robust analyses."""
         self.styling_instructions = styling_instructions
         self.available_agents = available_agents
-        self.chat_manager = ChatManager(db_url='sqlite:///chat_database.db')
         self.initialize_default_dataset()
@@ -74,8 +90,8 @@ The refined description aims to equip both users and analysis agents with essent
         """Initialize the default dataset and store it"""
         try:
             self._default_df = pd.read_csv("Housing.csv")
-            data_dict = make_data(self._default_df, self._dataset_description)
-            self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(data_dict)])
             self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
                                                   retrievers=self._default_retrievers)
         except Exception as e:
@@ -119,7 +135,7 @@ The refined description aims to equip both users and analysis agents with essent
             default_model_config = {
                 "provider": os.getenv("MODEL_PROVIDER", "openai"),
                 "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
-            "api_key": os.getenv("OPENAI_API_KEY"),
                 "temperature": float(os.getenv("TEMPERATURE", 1.0)),
                 "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
             }
@@ -188,8 +204,8 @@ The refined description aims to equip both users and analysis agents with essent
             desc: Description of the dataset
         """
         try:
-            data_dict = make_data(df, desc)
-            retrievers = self.initialize_retrievers(self.styling_instructions, [str(data_dict)])
             ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
             # Get default model config for new sessions
@@ -207,7 +223,7 @@ The refined description aims to equip both users and analysis agents with essent
                 "current_df": df,
                 "retrievers": retrievers,
                 "ai_system": ai_system,
-                "make_data": data_dict,
                 "description": desc,
                 "name": name,
                 "model_config": default_model_config,  # Initialize with default
@@ -380,4 +396,4 @@ async def get_session_id(request, session_manager):
     except Exception as e:
         logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
-    return session_id

 from src.agents.agents import auto_analyst, auto_analyst_ind
 from src.agents.retrievers.retrievers import make_data
 from src.managers.chat_manager import ChatManager
+from dotenv import load_dotenv
+load_dotenv()
 # Initialize logger
 logger = Logger("session_manager", see_time=False, console_log=False)
         self._make_data = None
         self._default_name = "Housing Dataset"  # Default dataset name
+        self._dataset_description = """This dataset contains residential property information with details about pricing, physical characteristics, and amenities. The data can be used for real estate market analysis, property valuation, and understanding the relationship between house features and prices.
+Key Features:
+- Property prices range from 1.75M to 13.3M (currency units)
+- Living areas from 1,650 to 16,200 (square units)
+- Properties vary from 1-6 bedrooms and 1-4 bathrooms
+- Various amenities tracked including parking, air conditioning, and hot water heating
 TECHNICAL CONSIDERATIONS FOR ANALYSIS:
+Numeric Columns:
+- price (int): Large values suggesting currency units; range 1.75M-13.3M
+- area (int): Square units measurement; range 1,650-16,200
+- bedrooms (int): Discrete values 1-6
+- bathrooms (int): Discrete values 1-4
+- stories (int): Discrete values 1-4
+- parking (int): Discrete values 0-3
+Binary Categorical Columns (stored as str):
+- mainroad (str): 'yes'/'no' - Consider boolean conversion
+- guestroom (str): 'yes'/'no' - Consider boolean conversion
+- basement (str): 'yes'/'no' - Consider boolean conversion
+- hotwaterheating (str): 'yes'/'no' - Consider boolean conversion
+- airconditioning (str): 'yes'/'no' - Consider boolean conversion
+- prefarea (str): 'yes'/'no' - Consider boolean conversion
+Other Categorical:
+- furnishingstatus (str): Categories include 'furnished', 'semi-furnished' - Consider one-hot encoding
+Data Handling Recommendations:
+1. Binary variables should be converted to boolean or numeric (0/1) for analysis
+2. Consider normalizing price and area values for certain analyses
+3. Furnishing status will need categorical encoding for numerical analysis
+4. No null values detected in the dataset
+5. All numeric columns are properly typed as numbers (no string conversion needed)
+6. Consider treating bedrooms, bathrooms, stories, and parking as categorical despite numeric storage
+This dataset appears clean with consistent formatting and no missing values, making it suitable for immediate analysis with appropriate categorical encoding.
+        """
         self.styling_instructions = styling_instructions
         self.available_agents = available_agents
+        self.chat_manager = ChatManager(db_url=os.getenv("DATABASE_URL"))
         self.initialize_default_dataset()
         """Initialize the default dataset and store it"""
         try:
             self._default_df = pd.read_csv("Housing.csv")
+            self._make_data = make_data(self._default_df, self._dataset_description)
+            self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
             self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()),
                                                   retrievers=self._default_retrievers)
         except Exception as e:
             default_model_config = {
                 "provider": os.getenv("MODEL_PROVIDER", "openai"),
                 "model": os.getenv("MODEL_NAME", "gpt-4o-mini"),
+                "api_key": os.getenv("OPENAI_API_KEY"),
                 "temperature": float(os.getenv("TEMPERATURE", 1.0)),
                 "max_tokens": int(os.getenv("MAX_TOKENS", 6000))
             }
             desc: Description of the dataset
         """
         try:
+            self._make_data = make_data(df, desc)
+            retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)])
             ai_system = auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers)
             # Get default model config for new sessions
                 "current_df": df,
                 "retrievers": retrievers,
                 "ai_system": ai_system,
+                "make_data": self._make_data,
                 "description": desc,
                 "name": name,
                 "model_config": default_model_config,  # Initialize with default
     except Exception as e:
         logger.log_message(f"Error auto-creating user for session {session_id}: {str(e)}", level=logging.ERROR)
+    return session_id