davidkariuki commited on
Commit
0f1987a
1 Parent(s): 047e260

delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -69
train.py DELETED
@@ -1,69 +0,0 @@
1
- import pandas as pd
2
- from sklearn.preprocessing import LabelEncoder
3
- from sklearn.model_selection import train_test_split, GridSearchCV
4
- from sklearn.ensemble import GradientBoostingRegressor
5
- from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, median_absolute_error
6
- from joblib import dump
7
-
8
- # Load the dataset
9
- df = pd.read_csv('cleaned_housesTRAIN.csv')
10
-
11
- # Apply label encoding to 'Area' and 'Suburb'
12
- le = LabelEncoder()
13
- df['Area'] = le.fit_transform(df['Area'])
14
- df['Suburb'] = le.fit_transform(df['Suburb'])
15
-
16
- # Shuffle the dataframe
17
- df_shuffled = df.sample(frac=1)
18
-
19
- # Split the shuffled data into features (X) and target (y)
20
- X_shuffled = df_shuffled.drop('Rent', axis=1)
21
- y_shuffled = df_shuffled['Rent']
22
-
23
- # Split the shuffled data into training and test sets (90/10 split)
24
- X_train_shuffled, X_test_shuffled, y_train_shuffled, y_test_shuffled = train_test_split(X_shuffled, y_shuffled, test_size=0.1, random_state=42)
25
-
26
- # Create a Gradient Boosting regressor
27
- gb_shuffled = GradientBoostingRegressor(random_state=42)
28
-
29
- # Train the model on the shuffled data
30
- gb_shuffled.fit(X_train_shuffled, y_train_shuffled)
31
-
32
- # Define the hyperparameter grid for Gradient Boosting
33
- param_grid_gb_shuffled = {
34
- 'n_estimators': [850],
35
- 'learning_rate': [0.195],
36
- 'max_depth': [7]
37
- }
38
-
39
- # Create a GridSearchCV object
40
- grid_search_gb_shuffled = GridSearchCV(estimator=gb_shuffled, param_grid=param_grid_gb_shuffled, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1)
41
-
42
- # Perform grid search on the training data
43
- grid_search_gb_shuffled.fit(X_train_shuffled, y_train_shuffled)
44
-
45
- # Get the best parameters for Gradient Boosting
46
- best_params_gb_shuffled = grid_search_gb_shuffled.best_params_
47
-
48
- # Print the best hyperparameters
49
- print(f"Best hyperparameters: {best_params_gb_shuffled}")
50
-
51
- # Create a new gradient boosting regressor with the best parameters
52
- gb_best_shuffled = GradientBoostingRegressor(**best_params_gb_shuffled, random_state=42)
53
-
54
- # Train the model
55
- gb_best_shuffled.fit(X_train_shuffled, y_train_shuffled)
56
-
57
- # Make predictions on the test set
58
- y_pred_gb_best_shuffled = gb_best_shuffled.predict(X_test_shuffled)
59
-
60
- # Calculate MAE, MSE, and R2
61
- mae_gb_best_shuffled = mean_absolute_error(y_test_shuffled, y_pred_gb_best_shuffled)
62
- mse_gb_best_shuffled = mean_squared_error(y_test_shuffled, y_pred_gb_best_shuffled)
63
- r2_gb_best_shuffled = r2_score(y_test_shuffled, y_pred_gb_best_shuffled)
64
- medae_gb_best_shuffled = median_absolute_error(y_test_shuffled, y_pred_gb_best_shuffled)
65
-
66
- print(f"MAE: {mae_gb_best_shuffled}, MSE: {mse_gb_best_shuffled}, R2: {r2_gb_best_shuffled}, MedAE: {medae_gb_best_shuffled}")
67
-
68
- # Save the model
69
- dump(gb_best_shuffled, 'bestmodelyet.joblib')