Spaces:
Running
Running
File size: 1,996 Bytes
c689089 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# train_lightgbm.py
import os
import pickle
import pandas as pd
import numpy as np
import lightgbm as lgb
from lightgbm import LGBMRegressor, early_stopping, record_evaluation
from lightgbm_model.scripts.config_lightgbm import (
DATA_PATH,
FEATURES,
TARGET,
LIGHTGBM_PARAMS,
EARLY_STOPPING_ROUNDS,
RESULTS_DIR,
MODEL_DIR
)
# === Load Data ===
df = pd.read_csv(DATA_PATH)
# Drop date (used later for plots only)
df = df.drop(columns=["date"], errors="ignore")
# === Time-based Split (70% train, 10% valid, 20% test) ===
train_size = int(len(df) * 0.7)
valid_size = int(len(df) * 0.1)
df_train = df.iloc[:train_size]
df_valid = df.iloc[train_size:train_size + valid_size]
df_test = df.iloc[train_size + valid_size:]
X_train, y_train = df_train[FEATURES], df_train[TARGET]
X_valid, y_valid = df_valid[FEATURES], df_valid[TARGET]
X_test, y_test = df_test[FEATURES], df_test[TARGET]
# === Init LightGBM model ===
eval_result = {}
model = LGBMRegressor(
**LIGHTGBM_PARAMS,
verbosity=-1
)
model.fit(
X_train,
y_train,
eval_set=[(X_train, y_train), (X_valid, y_valid)],
eval_metric="rmse",
callbacks=[
early_stopping(EARLY_STOPPING_ROUNDS),
record_evaluation(eval_result)
]
)
# === Save model ===
os.makedirs(MODEL_DIR, exist_ok=True)
model_path = os.path.join(MODEL_DIR, "lightgbm_final_model.pkl")
with open(model_path, "wb") as f:
pickle.dump(model, f)
# === Save evaluation results ===
os.makedirs(RESULTS_DIR, exist_ok=True)
eval_result_path = os.path.join(RESULTS_DIR, "lightgbm_eval_result.pkl")
with open(eval_result_path, "wb") as f:
pickle.dump(eval_result, f)
print(f"Model saved to: {model_path}")
print(f"Eval results saved to: {eval_result_path}")
# === Save data for evaluation ===
X_train.to_csv(os.path.join(RESULTS_DIR, "X_train.csv"), index=False)
X_test.to_csv(os.path.join(RESULTS_DIR, "X_test.csv"), index=False)
y_test.to_csv(os.path.join(RESULTS_DIR, "y_test.csv"), index=False)
|