KaiquanMah commited on
Commit
40cf979
·
verified ·
1 Parent(s): abfaf79

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +10 -1
preprocess.py CHANGED
@@ -24,10 +24,19 @@ def parse(csv_path):
24
  X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
25
  y = data["is_click"] # Target column
26
 
 
 
 
 
 
 
 
 
27
  # Normalize numerical features
28
  scaler = MinMaxScaler()
29
  numerical_features = ['campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
30
- data[numerical_features] = scaler.fit_transform(data[numerical_features])
 
31
  data.to_csv('data/03 normalize.csv', index=False)
32
 
33
 
 
24
  X = data[["DateTime","product","campaign_id","webpage_id","product_category_1","product_category_2","user_group_id","gender","age_level","user_depth","city_development_index","var_1"]] # Feature columns
25
  y = data["is_click"] # Target column
26
 
27
+ # Extract datetime features
28
+ X['DateTime'] = pd.to_datetime(X['DateTime'])
29
+ X['DayOfWeek'] = X['DateTime'].dt.dayofweek
30
+ X['Month'] = X['DateTime'].dt.month
31
+ X['Hour'] = X['DateTime'].dt.hour
32
+ X = X.drop('DateTime', axis=1)
33
+
34
+
35
  # Normalize numerical features
36
  scaler = MinMaxScaler()
37
  numerical_features = ['campaign_id','webpage_id','user_depth',"product_category_1","product_category_2","user_group_id", 'age_level',"user_depth", 'city_development_index', 'var_1']
38
+ X[numerical_features] = scaler.fit_transform(X[numerical_features])
39
+ data = pd.concat([X, y.to_frame(name="is_click")], axis=1)
40
  data.to_csv('data/03 normalize.csv', index=False)
41
 
42