| import pandas as pd |
| import numpy as np |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.metrics import accuracy_score, classification_report |
|
|
| |
| def create_features(df: pd.DataFrame, target_lag: int = 1) -> pd.DataFrame: |
| """ |
| Creates basic features for financial time series. |
| - Lagged returns |
| - Target variable (e.g., price goes up or down) |
| """ |
| df_copy = df.copy() |
| df_copy['returns'] = df_copy['Close'].pct_change() |
| |
| |
| df_copy['target'] = (df_copy['Close'].shift(-target_lag) > df_copy['Close']).astype(int) |
| |
| |
| df_copy['ma5'] = df_copy['Close'].rolling(window=5).mean() |
| df_copy['ma20'] = df_copy['Close'].rolling(window=20).mean() |
| |
| df_copy = df_copy.dropna() |
| return df_copy |
|
|
| def preprocess_data_for_supervised( |
| df: pd.DataFrame, |
| features_list: list = ['returns', 'ma5', 'ma20'], |
| target_col: str = 'target', |
| test_size: float = 0.2, |
| random_state: int = 42 |
| ): |
| """ |
| Prepares data for supervised learning. |
| - Selects features and target. |
| - Splits data into training and testing sets. |
| - Scales features. |
| """ |
| X = df[features_list] |
| y = df[target_col] |
| |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=False) |
| |
| scaler = StandardScaler() |
| X_train_scaled = scaler.fit_transform(X_train) |
| X_test_scaled = scaler.transform(X_test) |
| |
| return X_train_scaled, X_test_scaled, y_train, y_test, scaler |
|
|
| def train_supervised_model(X_train, y_train, model_type='logistic_regression', model_params=None): |
| """ |
| Trains a supervised learning model. |
| """ |
| if model_params is None: |
| model_params = {} |
| |
| if model_type == 'logistic_regression': |
| model = LogisticRegression(**model_params, random_state=42, max_iter=1000) |
| |
| |
| |
| |
| else: |
| raise ValueError(f"Unsupported model type: {model_type}") |
| |
| model.fit(X_train, y_train) |
| return model |
|
|
| def evaluate_supervised_model(model, X_test, y_test): |
| """ |
| Evaluates the trained supervised model. |
| """ |
| predictions = model.predict(X_test) |
| accuracy = accuracy_score(y_test, predictions) |
| report = classification_report(y_test, predictions) |
| |
| print(f"Model Accuracy: {accuracy:.4f}") |
| print("Classification Report:") |
| print(report) |
| |
| return accuracy, report, predictions |
|
|
| if __name__ == '__main__': |
| |
| |
| |
| |
| |
| |
| |
| |
| dates = pd.date_range(start='2023-01-01', periods=200, freq='B') |
| data = np.random.rand(200, 5) * 100 + 100 |
| raw_data = pd.DataFrame(data, index=dates, columns=['Open', 'High', 'Low', 'Close', 'Volume']) |
| raw_data['Close'] = raw_data['Close'] + np.sin(np.linspace(0, 10, 200)) * 10 |
|
|
| if not raw_data.empty: |
| |
| featured_data = create_features(raw_data) |
| |
| if not featured_data.empty: |
| |
| X_train_scaled, X_test_scaled, y_train, y_test, scaler = preprocess_data_for_supervised( |
| featured_data, |
| features_list=['returns', 'ma5', 'ma20'] |
| ) |
| |
| |
| print("Training supervised model...") |
| trained_model = train_supervised_model(X_train_scaled, y_train) |
| print("Model trained.") |
| |
| |
| print("\nEvaluating model...") |
| evaluate_supervised_model(trained_model, X_test_scaled, y_test) |
| else: |
| print("Featured data is empty. Check feature creation.") |
| else: |
| print("Raw data is empty. Check data fetching.") |