Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -38,29 +38,22 @@ def train_the_model(data,page):
|
|
38 |
'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
|
39 |
'status_name']
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
new_data_filled = new_data[selected_columns].fillna('Missing')
|
47 |
-
|
48 |
-
# Encoding categorical data
|
49 |
for col, encoder in encoders.items():
|
50 |
if col in new_data_filled.columns:
|
51 |
unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
|
52 |
if unseen_categories:
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
# Splitting data into training and testing sets
|
61 |
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
|
62 |
-
|
63 |
-
|
|
|
64 |
param_grid = {
|
65 |
'max_depth': [3, 4, 5],
|
66 |
'learning_rate': [0.01, 0.1, 0.4],
|
@@ -69,10 +62,13 @@ def train_the_model(data,page):
|
|
69 |
'colsample_bytree': [0.3, 0.7]
|
70 |
}
|
71 |
|
72 |
-
#
|
|
|
|
|
|
|
73 |
grid_search = GridSearchCV(xgb_model, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
|
74 |
|
75 |
-
#
|
76 |
grid_search.fit(X_train, y_train)
|
77 |
|
78 |
dump(grid_search, 'transexpress_xgb_model.joblib')
|
|
|
38 |
'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
|
39 |
'status_name']
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
new_data_filled = new_data[selected_columns].fillna('Missing')
|
|
|
|
|
42 |
for col, encoder in encoders.items():
|
43 |
if col in new_data_filled.columns:
|
44 |
unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
|
45 |
if unseen_categories:
|
46 |
+
for category in unseen_categories:
|
47 |
+
encoder.classes_ = np.append(encoder.classes_, category)
|
48 |
+
new_data_filled[col] = encoder.transform(new_data_filled[col])
|
49 |
+
else:
|
50 |
+
new_data_filled[col] = encoder.transform(new_data_filled[col])
|
51 |
+
X_new = new_data_filled.drop('status.name', axis=1)
|
52 |
+
y_new = new_data_filled['status.name']
|
|
|
53 |
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
|
54 |
+
|
55 |
+
|
56 |
+
# Setup the hyperparameter grid to search
|
57 |
param_grid = {
|
58 |
'max_depth': [3, 4, 5],
|
59 |
'learning_rate': [0.01, 0.1, 0.4],
|
|
|
62 |
'colsample_bytree': [0.3, 0.7]
|
63 |
}
|
64 |
|
65 |
+
# Initialize the classifier
|
66 |
+
#xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
|
67 |
+
|
68 |
+
# Setup GridSearchCV
|
69 |
grid_search = GridSearchCV(xgb_model, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
|
70 |
|
71 |
+
# Fit the grid search to the data
|
72 |
grid_search.fit(X_train, y_train)
|
73 |
|
74 |
dump(grid_search, 'transexpress_xgb_model.joblib')
|