transexpress_ml_api

Sleeping

App Files Files Community

Arafath10 commited on May 1

Commit

11a461e

•

1 Parent(s): d7d7137

Update main.py

Browse files

Files changed (1) hide show

main.py +19 -5

main.py CHANGED Viewed

@@ -31,6 +31,10 @@ def train_the_model():
         data = pd.read_csv("trainer_data.csv")
         print(data["customer_name"].count())
         # Select columns
         selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
@@ -38,7 +42,8 @@ def train_the_model():
                             'status_name']
         # Handling missing values
-        data_filled = data[selected_columns].fillna('Missing')
         # Encoding categorical variables
         encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
@@ -52,7 +57,7 @@ def train_the_model():
         # Parameters to use for the model
         # Parameters to use for the model
-        params = {
             'colsample_bytree': 0.3,
             'learning_rate': 0.6,
             'max_depth': 6,
@@ -60,6 +65,15 @@ def train_the_model():
             'subsample': 0.9,
             'use_label_encoder': False,
             'eval_metric': 'logloss'
         }
         # Initialize the classifier with the specified parameters
@@ -157,7 +171,7 @@ async def your_continuous_function(page: str,paginate: str):
     print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
     # Save the balanced dataset if needed
-    oversampled_data.to_csv('trainer_data.csv', index=False)
@@ -215,8 +229,8 @@ def predict(
         'customer_name': customer_name,
         'customer_address': customer_address,
         'customer_phone_no': customer_phone,
-        'weight': weight,
-        'cod': cod,
         'pickup_address':pickup_address,
         'client_number':client_number,
         'destination_city':destination_city

         data = pd.read_csv("trainer_data.csv")
         print(data["customer_name"].count())
+        data = pd.read_csv("trainer_data_balanced.csv")
+        print(data["customer_name"].count())
         # Select columns
         selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
                             'status_name']
         # Handling missing values
+        #data_filled = data[selected_columns].fillna('Missing')
+        data_filled = data[selected_columns].dropna()
         # Encoding categorical variables
         encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
         # Parameters to use for the model
         # Parameters to use for the model
+        """params = {
             'colsample_bytree': 0.3,
             'learning_rate': 0.6,
             'max_depth': 6,
             'subsample': 0.9,
             'use_label_encoder': False,
             'eval_metric': 'logloss'
+        }"""
+        params = {
+            'colsample_bytree': 0.9,
+            'learning_rate': 0.1,
+            'max_depth': 30,
+            'n_estimators': 500,
+            'subsample': 0.9,
+            'use_label_encoder': False,
+            'eval_metric': 'logloss'
         }
         # Initialize the classifier with the specified parameters
     print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
     # Save the balanced dataset if needed
+    oversampled_data.to_csv('trainer_data_balanced.csv', index=False)
         'customer_name': customer_name,
         'customer_address': customer_address,
         'customer_phone_no': customer_phone,
+        'weight': float(weight),
+        'cod': int(cod),
         'pickup_address':pickup_address,
         'client_number':client_number,
         'destination_city':destination_city