Spaces:
Configuration error
Configuration error
Update preprocess.py
Browse files- preprocess.py +9 -0
preprocess.py
CHANGED
|
@@ -2,6 +2,7 @@ import argparse
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.model_selection import train_test_split
|
| 4 |
from sklearn.preprocessing import MinMaxScaler
|
|
|
|
| 5 |
import os
|
| 6 |
|
| 7 |
|
|
@@ -35,6 +36,14 @@ def parse(csv_path):
|
|
| 35 |
X.loc[:,"hour"] = pd.to_datetime(X['DateTime'], errors='coerce').dt.hour.values
|
| 36 |
X = X.drop('DateTime', axis=1)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Normalize numerical features
|
| 40 |
scaler = MinMaxScaler()
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from sklearn.model_selection import train_test_split
|
| 4 |
from sklearn.preprocessing import MinMaxScaler
|
| 5 |
+
from sklearn.preprocessing import LabelEncoder
|
| 6 |
import os
|
| 7 |
|
| 8 |
|
|
|
|
| 36 |
X.loc[:,"hour"] = pd.to_datetime(X['DateTime'], errors='coerce').dt.hour.values
|
| 37 |
X = X.drop('DateTime', axis=1)
|
| 38 |
|
| 39 |
+
# Product label to number
|
| 40 |
+
le = LabelEncoder()
|
| 41 |
+
X.loc[:,"product"] = le.fit_transform(X["product"])
|
| 42 |
+
# Gender label to number
|
| 43 |
+
X['gender'] = X['gender'].map({'Female': 1,
|
| 44 |
+
'Male': 0,
|
| 45 |
+
'M': 0})
|
| 46 |
+
|
| 47 |
|
| 48 |
# Normalize numerical features
|
| 49 |
scaler = MinMaxScaler()
|