| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import sys | |
| import os | |
| from sklearn.preprocessing import StandardScaler,MinMaxScaler | |
| np.set_printoptions(suppress=True) | |
| ss,mm = StandardScaler(), MinMaxScaler() | |
| thigh = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/front.txt",delimiter=',',usecols =[i for i in range(13) if i != 0]) | |
| shin = pd.read_csv(f"../../data/unprocessed/{sys.argv[1]}/back.txt",delimiter=',',usecols =[i for i in range(13) if i != 0]) | |
| thigh,shin = thigh.dropna(),shin.dropna() | |
| delta = len(thigh) - len(shin) | |
| thigh = thigh[delta:] | |
| thigh.reset_index(inplace=True) | |
| thigh,shin = thigh[:55000],shin[:55000] | |
| for col in thigh.columns: | |
| thigh.rename(columns={col:col+"_th"},inplace=True) | |
| shin.rename(columns={col:col+"_sh"},inplace=True) | |
| p_columns_th = [col for col in thigh.columns if col.startswith('p')] | |
| s_columns_th = [col for col in thigh.columns if col.startswith('s')] | |
| p_columns_sh = [col for col in shin.columns if col.startswith('p')] | |
| s_columns_sh = [col for col in shin.columns if col.startswith('s')] | |
| features = thigh[s_columns_th] | |
| features = pd.concat([features,shin[s_columns_sh]],axis=1) | |
| labels = thigh[p_columns_th] | |
| labels = pd.concat([labels,shin[p_columns_sh]],axis=1) | |
| features_scaled = pd.DataFrame(ss.fit_transform(features), columns=features.columns) | |
| labels_scaled = pd.DataFrame(mm.fit_transform(labels), columns=labels.columns) | |
| os.makedirs(f"../../data/processed/{sys.argv[1]}",exist_ok=True) | |
| features.to_csv(f"../../data/processed/{sys.argv[1]}/features.csv") | |
| labels.to_csv(f"../../data/processed/{sys.argv[1]}/labels.csv") | |
| def preprocess_data(features_df, labels_df, lookback_window, predict_window, output_file): | |
| lookback_window *= 150 | |
| predict_window *= 150 | |
| total_samples = len(features_df) - lookback_window - predict_window | |
| x_data = torch.zeros((total_samples, lookback_window, features_df.shape[1])) | |
| y_data = torch.zeros((total_samples, predict_window, labels_df.shape[1])) | |
| for idx, i in enumerate(range(lookback_window, len(features_df) - predict_window)): | |
| if idx % 1000 == 0: | |
| print(f"Processing sample {idx}/{total_samples}...") | |
| x_data[idx] = torch.tensor(features_df.iloc[i - lookback_window:i].values, dtype=torch.float32) | |
| y_data[idx] = torch.tensor(labels_df.iloc[i:i + predict_window].values, dtype=torch.float32) | |
| torch.save({"x": x_data, "y": y_data}, output_file) | |
| print(f"Preprocessed data saved to {output_file}") | |
| preprocess_data(features_scaled,labels_scaled,3,3,f"../../data/processed/{sys.argv[1]}/data.pt") | |