Arafath10 commited on
Commit
11a461e
1 Parent(s): d7d7137

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -5
main.py CHANGED
@@ -31,6 +31,10 @@ def train_the_model():
31
 
32
  data = pd.read_csv("trainer_data.csv")
33
  print(data["customer_name"].count())
 
 
 
 
34
 
35
  # Select columns
36
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
@@ -38,7 +42,8 @@ def train_the_model():
38
  'status_name']
39
 
40
  # Handling missing values
41
- data_filled = data[selected_columns].fillna('Missing')
 
42
 
43
  # Encoding categorical variables
44
  encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
@@ -52,7 +57,7 @@ def train_the_model():
52
 
53
  # Parameters to use for the model
54
  # Parameters to use for the model
55
- params = {
56
  'colsample_bytree': 0.3,
57
  'learning_rate': 0.6,
58
  'max_depth': 6,
@@ -60,6 +65,15 @@ def train_the_model():
60
  'subsample': 0.9,
61
  'use_label_encoder': False,
62
  'eval_metric': 'logloss'
 
 
 
 
 
 
 
 
 
63
  }
64
 
65
  # Initialize the classifier with the specified parameters
@@ -157,7 +171,7 @@ async def your_continuous_function(page: str,paginate: str):
157
  print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
158
 
159
  # Save the balanced dataset if needed
160
- oversampled_data.to_csv('trainer_data.csv', index=False)
161
 
162
 
163
 
@@ -215,8 +229,8 @@ def predict(
215
  'customer_name': customer_name,
216
  'customer_address': customer_address,
217
  'customer_phone_no': customer_phone,
218
- 'weight': weight,
219
- 'cod': cod,
220
  'pickup_address':pickup_address,
221
  'client_number':client_number,
222
  'destination_city':destination_city
 
31
 
32
  data = pd.read_csv("trainer_data.csv")
33
  print(data["customer_name"].count())
34
+
35
+ data = pd.read_csv("trainer_data_balanced.csv")
36
+ print(data["customer_name"].count())
37
+
38
 
39
  # Select columns
40
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
 
42
  'status_name']
43
 
44
  # Handling missing values
45
+ #data_filled = data[selected_columns].fillna('Missing')
46
+ data_filled = data[selected_columns].dropna()
47
 
48
  # Encoding categorical variables
49
  encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
 
57
 
58
  # Parameters to use for the model
59
  # Parameters to use for the model
60
+ """params = {
61
  'colsample_bytree': 0.3,
62
  'learning_rate': 0.6,
63
  'max_depth': 6,
 
65
  'subsample': 0.9,
66
  'use_label_encoder': False,
67
  'eval_metric': 'logloss'
68
+ }"""
69
+ params = {
70
+ 'colsample_bytree': 0.9,
71
+ 'learning_rate': 0.1,
72
+ 'max_depth': 30,
73
+ 'n_estimators': 500,
74
+ 'subsample': 0.9,
75
+ 'use_label_encoder': False,
76
+ 'eval_metric': 'logloss'
77
  }
78
 
79
  # Initialize the classifier with the specified parameters
 
171
  print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
172
 
173
  # Save the balanced dataset if needed
174
+ oversampled_data.to_csv('trainer_data_balanced.csv', index=False)
175
 
176
 
177
 
 
229
  'customer_name': customer_name,
230
  'customer_address': customer_address,
231
  'customer_phone_no': customer_phone,
232
+ 'weight': float(weight),
233
+ 'cod': int(cod),
234
  'pickup_address':pickup_address,
235
  'client_number':client_number,
236
  'destination_city':destination_city