Spaces:
Configuration error
Configuration error
Update preprocess.py
Browse files- preprocess.py +10 -1
preprocess.py
CHANGED
|
@@ -24,10 +24,19 @@ def parse(csv_path):
|
|
| 24 |
X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
|
| 25 |
y = data["is_click"] # Target column
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Normalize numerical features
|
| 28 |
scaler = MinMaxScaler()
|
| 29 |
numerical_features = ['campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
|
| 30 |
-
|
|
|
|
| 31 |
data.to_csv('data/03 normalize.csv', index=False)
|
| 32 |
|
| 33 |
|
|
|
|
| 24 |
X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
|
| 25 |
y = data["is_click"] # Target column
|
| 26 |
|
| 27 |
+
# Extract datetime features
|
| 28 |
+
X['DateTime'] = pd.to_datetime(X['DateTime'])
|
| 29 |
+
X['DayOfWeek'] = X['DateTime'].dt.dayofweek
|
| 30 |
+
X['Month'] = X['DateTime'].dt.month
|
| 31 |
+
X['Hour'] = X['DateTime'].dt.hour
|
| 32 |
+
X = X.drop('DateTime', axis=1)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
# Normalize numerical features
|
| 36 |
scaler = MinMaxScaler()
|
| 37 |
numerical_features = ['campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
|
| 38 |
+
X[numerical_features] = scaler.fit_transform(X[numerical_features])
|
| 39 |
+
data = pd.concat([X, y.to_frame(name="is_click")], axis=1)
|
| 40 |
data.to_csv('data/03 normalize.csv', index=False)
|
| 41 |
|
| 42 |
|