Arafath10 commited on
Commit
6f3e751
1 Parent(s): abb2214

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +15 -19
main.py CHANGED
@@ -38,29 +38,22 @@ def train_the_model(data,page):
38
  'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
39
  'status_name']
40
 
41
-
42
-
43
-
44
-
45
-
46
  new_data_filled = new_data[selected_columns].fillna('Missing')
47
-
48
- # Encoding categorical data
49
  for col, encoder in encoders.items():
50
  if col in new_data_filled.columns:
51
  unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
52
  if unseen_categories:
53
- encoder.classes_ = np.append(encoder.classes_, unseen_categories)
54
- new_data_filled[col] = encoder.transform(new_data_filled[col])
55
-
56
- # Splitting data into features and target
57
- X_new = new_data_filled.drop('status_name', axis=1)
58
- y_new = new_data_filled['status_name']
59
-
60
- # Splitting data into training and testing sets
61
  X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
62
-
63
- # Setting up parameter grid for hyperparameter tuning
 
64
  param_grid = {
65
  'max_depth': [3, 4, 5],
66
  'learning_rate': [0.01, 0.1, 0.4],
@@ -69,10 +62,13 @@ def train_the_model(data,page):
69
  'colsample_bytree': [0.3, 0.7]
70
  }
71
 
72
- # Initializing GridSearchCV
 
 
 
73
  grid_search = GridSearchCV(xgb_model, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
74
 
75
- # Fitting GridSearchCV
76
  grid_search.fit(X_train, y_train)
77
 
78
  dump(grid_search, 'transexpress_xgb_model.joblib')
 
38
  'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
39
  'status_name']
40
 
 
 
 
 
 
41
  new_data_filled = new_data[selected_columns].fillna('Missing')
 
 
42
  for col, encoder in encoders.items():
43
  if col in new_data_filled.columns:
44
  unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
45
  if unseen_categories:
46
+ for category in unseen_categories:
47
+ encoder.classes_ = np.append(encoder.classes_, category)
48
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
49
+ else:
50
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
51
+ X_new = new_data_filled.drop('status.name', axis=1)
52
+ y_new = new_data_filled['status.name']
 
53
  X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
54
+
55
+
56
+ # Setup the hyperparameter grid to search
57
  param_grid = {
58
  'max_depth': [3, 4, 5],
59
  'learning_rate': [0.01, 0.1, 0.4],
 
62
  'colsample_bytree': [0.3, 0.7]
63
  }
64
 
65
+ # Initialize the classifier
66
+ #xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
67
+
68
+ # Setup GridSearchCV
69
  grid_search = GridSearchCV(xgb_model, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
70
 
71
+ # Fit the grid search to the data
72
  grid_search.fit(X_train, y_train)
73
 
74
  dump(grid_search, 'transexpress_xgb_model.joblib')