davidkariuki commited on
Commit
7ef0172
1 Parent(s): 90ae9e0

Upload train.py

Browse files

The script I used to train my dataset.

Files changed (1) hide show
  1. train.py +51 -0
train.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.preprocessing import LabelEncoder
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import GradientBoostingRegressor
5
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, median_absolute_error
6
+ from joblib import dump
7
+
8
+ # Load the dataset
9
+ df = pd.read_csv('cleaned_housesTRAIN.csv')
10
+
11
+ # Apply label encoding to 'Area' and 'Suburb'
12
+ le_area = LabelEncoder()
13
+ df['Area'] = le_area.fit_transform(df['Area'])
14
+
15
+ le_suburb = LabelEncoder()
16
+ df['Suburb'] = le_suburb.fit_transform(df['Suburb'])
17
+
18
+ # Save the label encoders
19
+ dump(le_area, 'le_area.joblib')
20
+ dump(le_suburb, 'le_suburb.joblib')
21
+
22
+ # Shuffle the dataframe
23
+ df = df.sample(frac=1)
24
+
25
+ # Split the data into features (X) and target (y)
26
+ X = df.drop('Rent', axis=1)
27
+ y = df['Rent']
28
+
29
+ # Split the data into training and test sets (90/10 split)
30
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
31
+
32
+ # Create a Gradient Boosting regressor with specified hyperparameters
33
+ gb = GradientBoostingRegressor(n_estimators=850, learning_rate=0.195, max_depth=7, random_state=42)
34
+
35
+ # Train the model
36
+ gb.fit(X_train, y_train)
37
+
38
+ # Make predictions on the test set
39
+ y_pred = gb.predict(X_test)
40
+
41
+ # Calculate MAE, MSE, and R2
42
+ mae = mean_absolute_error(y_test, y_pred)
43
+ mse = mean_squared_error(y_test, y_pred)
44
+ r2 = r2_score(y_test, y_pred)
45
+ medae = median_absolute_error(y_test, y_pred)
46
+
47
+ print(f"MAE: {mae}, MSE: {mse}, R2: {r2}, MedAE: {medae}")
48
+
49
+ # Save the model
50
+ dump(gb, 'bestmodelyet.joblib')
51
+