KaiquanMah commited on
Commit
26f170a
·
verified ·
1 Parent(s): 4b6a510

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +9 -0
preprocess.py CHANGED
@@ -2,6 +2,7 @@ import argparse
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.preprocessing import MinMaxScaler
 
5
  import os
6
 
7
 
@@ -35,6 +36,14 @@ def parse(csv_path):
35
  X.loc[:,"hour"] = pd.to_datetime(X['DateTime'], errors='coerce').dt.hour.values
36
  X = X.drop('DateTime', axis=1)
37
 
 
 
 
 
 
 
 
 
38
 
39
  # Normalize numerical features
40
  scaler = MinMaxScaler()
 
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.preprocessing import MinMaxScaler
5
+ from sklearn.preprocessing import LabelEncoder
6
  import os
7
 
8
 
 
36
  X.loc[:,"hour"] = pd.to_datetime(X['DateTime'], errors='coerce').dt.hour.values
37
  X = X.drop('DateTime', axis=1)
38
 
39
+ # Product label to number
40
+ le = LabelEncoder()
41
+ X.loc[:,"product"] = le.fit_transform(X["product"])
42
+ # Gender label to number
43
+ X['gender'] = X['gender'].map({'Female': 1,
44
+ 'Male': 0,
45
+ 'M': 0})
46
+
47
 
48
  # Normalize numerical features
49
  scaler = MinMaxScaler()