Arafath10 commited on
Commit
06e74c7
1 Parent(s): 8e25bbd

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -15
main.py CHANGED
@@ -28,36 +28,59 @@ app.add_middleware(
28
 
29
  def train_the_model(data):
30
  try:
 
31
  new_data = data
32
  encoders = load('transexpress_encoders.joblib')
33
  xgb_model = load('transexpress_xgb_model.joblib')
 
 
34
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
35
- 'weight','cod','pickup_address','client_number','destination_city',
36
  'status_name']
37
-
38
  new_data_filled = new_data[selected_columns].fillna('Missing')
 
 
39
  for col, encoder in encoders.items():
40
  if col in new_data_filled.columns:
41
  unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
42
  if unseen_categories:
43
- for category in unseen_categories:
44
- encoder.classes_ = np.append(encoder.classes_, category)
45
- new_data_filled[col] = encoder.transform(new_data_filled[col])
46
- else:
47
- new_data_filled[col] = encoder.transform(new_data_filled[col])
48
  X_new = new_data_filled.drop('status_name', axis=1)
49
  y_new = new_data_filled['status_name']
50
-
51
- X_train, X_test, y_train, y_test = train_test_split(X_new,y_new, test_size=0.2, random_state=42)
52
 
53
- xgb_model.fit(X_new, y_new)
54
- dump(xgb_model,'transexpress_xgb_model.joblib')
55
 
56
-
57
- y_pred = xgb_model.predict(X_test)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  accuracy = accuracy_score(y_test, y_pred)
59
  classification_rep = classification_report(y_test, y_pred)
60
- return accuracy,classification_rep,"Model finetuned with new data."
 
 
 
61
 
62
 
63
  except:
@@ -94,7 +117,7 @@ def train_the_model(data):
94
  xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
95
 
96
  # Setup GridSearchCV
97
- grid_search = GridSearchCV(xgb, param_grid, cv=40, n_jobs=-1, scoring='accuracy')
98
 
99
  # Fit the grid search to the data
100
  grid_search.fit(X_train, y_train)
 
28
 
29
  def train_the_model(data):
30
  try:
31
+
32
  new_data = data
33
  encoders = load('transexpress_encoders.joblib')
34
  xgb_model = load('transexpress_xgb_model.joblib')
35
+
36
+ # Selecting and filling missing data
37
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
38
+ 'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
39
  'status_name']
 
40
  new_data_filled = new_data[selected_columns].fillna('Missing')
41
+
42
+ # Encoding categorical data
43
  for col, encoder in encoders.items():
44
  if col in new_data_filled.columns:
45
  unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
46
  if unseen_categories:
47
+ encoder.classes_ = np.append(encoder.classes_, unseen_categories)
48
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
49
+
50
+ # Splitting data into features and target
 
51
  X_new = new_data_filled.drop('status_name', axis=1)
52
  y_new = new_data_filled['status_name']
 
 
53
 
54
+ # Splitting data into training and testing sets
55
+ X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
56
 
57
+ # Setting up parameter grid for hyperparameter tuning
58
+ param_grid = {
59
+ 'max_depth': [3, 4, 5],
60
+ 'learning_rate': [0.01, 0.1, 0.4],
61
+ 'n_estimators': [100, 200, 300],
62
+ 'subsample': [0.8, 0.9, 1],
63
+ 'colsample_bytree': [0.3, 0.7]
64
+ }
65
+
66
+ # Initializing GridSearchCV
67
+ grid_search = GridSearchCV(estimator=xgb_model, param_grid, cv=50, n_jobs=-1, scoring='accuracy')
68
+
69
+ # Fitting GridSearchCV
70
+ grid_search.fit(X_train, y_train)
71
+
72
+ # Updating the model with the best estimator
73
+ best_model = grid_search.best_estimator_
74
+ dump(best_model, 'transexpress_xgb_model.joblib')
75
+
76
+ # Making predictions and evaluating the model
77
+ y_pred = best_model.predict(X_test)
78
  accuracy = accuracy_score(y_test, y_pred)
79
  classification_rep = classification_report(y_test, y_pred)
80
+
81
+ # Returning the results
82
+ return accuracy, classification_rep, "Model finetuned with new data."
83
+
84
 
85
 
86
  except:
 
117
  xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
118
 
119
  # Setup GridSearchCV
120
+ grid_search = GridSearchCV(xgb, param_grid, cv=50, n_jobs=-1, scoring='accuracy')
121
 
122
  # Fit the grid search to the data
123
  grid_search.fit(X_train, y_train)