Spaces:

Reality8081
/

SVR_Predict_Stocks

Running

App Files Files Community

Reality8081 commited on 23 days ago

Commit

35beba6

1 Parent(s): 9466500

Update src

Browse files

Files changed (4) hide show

app.py +52 -41
src/data_processing.py +4 -4
src/inference.py +22 -26
src/train.py +81 -80

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 import yfinance as yf
 from datetime import datetime, timedelta
-from src.inference import predict_next_day
 # --- CÁC HÀM HỖ TRỢ TÍNH TOÁN KỸ THUẬT CHO UI ---
 def calculate_ui_technical_indicators(df):
@@ -42,7 +42,7 @@ def calculate_ui_technical_indicators(df):
 def generate_quant_dashboard(ticker, model_name):
     try:
         # 1. Gọi Inference Engine
-        preds, last_close, last_date, _ = predict_next_day(ticker, model_name)
         # 2. Lấy dữ liệu OHLCV 90 ngày để vẽ Candlestick & tính toán Context
         # Sử dụng yfinance trực tiếp để render UI mượt mà, độc lập với backend load_data nặng nề
@@ -60,7 +60,12 @@ def generate_quant_dashboard(ticker, model_name):
         rsi_val = last_row['RSI_14']
         macd_h  = last_row['MACD_Hist']
-        next_day = pd.to_datetime(last_date) + pd.offsets.BDay(1)
     except Exception as e:
         error_html = f"""<div style='background-color:#3a1010; padding:15px; border-left: 4px solid #ff4d4d; color: #ff8080;'>
@@ -76,6 +81,14 @@ def generate_quant_dashboard(ticker, model_name):
     consensus_html = ""
     target_price = 0
     if model_name == "Cả Hai":
         target_price = (price_lr + price_svr) / 2
         spread_bps = abs(pred_lr - pred_svr) * 10000 # Basis points
@@ -90,28 +103,32 @@ def generate_quant_dashboard(ticker, model_name):
             direction = "UNCERTAIN / CHOPPY ⚠️"
         consensus_html = f"""
-        <div style="background:#1a1a24; border: 1px solid #333; padding: 15px; border-radius: 5px; margin-bottom: 10px;">
-            <p style="color:#8892b0; margin:0; font-size:12px; font-family: monospace;">ALGO CONSENSUS ENGINE</p>
-            <h3 style="color:{color}; margin: 5px 0;">{status}: {direction}</h3>
-            <p style="color:#a8b2d1; margin:0; font-size:13px;">Divergence Spread: <b>{spread_bps:.1f} bps</b></p>
-            <div style="display:flex; justify-content: space-between; margin-top: 10px; font-family: monospace; font-size: 13px;">
-                <span style="color: {'#00ff00' if pred_lr>0 else '#ff3333'}">LR: {pred_lr*100:+.2f}% (${price_lr:.2f})</span>
-                <span style="color: {'#00ff00' if pred_svr>0 else '#ff3333'}">SVR: {pred_svr*100:+.2f}% (${price_svr:.2f})</span>
-            </div>
-        </div>
-        """
     else:
         active_pred = pred_lr if model_name == "Linear Regression" else pred_svr
         target_price = price_lr if model_name == "Linear Regression" else price_svr
         dir_color = "#00ff00" if active_pred > 0 else "#ff3333"
         dir_text = "BULLISH 📈" if active_pred > 0 else "BEARISH 📉"
         consensus_html = f"""
-        <div style="background:#1a1a24; border: 1px solid #333; padding: 15px; border-radius: 5px; margin-bottom: 10px;">
-            <p style="color:#8892b0; margin:0; font-size:12px; font-family: monospace;">SINGLE MODEL ACTIVATED: {model_name.upper()}</p>
-            <h3 style="color:{dir_color}; margin: 5px 0;">DIRECTION: {dir_text}</h3>
-            <p style="color:#a8b2d1; margin:0; font-size:13px;">Expected Return: <b>{active_pred*100:+.2f}%</b></p>
-        </div>
-        """
     # 4. Market Context Panel (Technical Stats)
     rsi_color = "#ff3333" if rsi_val > 70 else ("#00ff00" if rsi_val < 30 else "#a8b2d1")
@@ -134,7 +151,7 @@ def generate_quant_dashboard(ticker, model_name):
     # 5. Vẽ biểu đồ Plotly cấp độ Institutional
     fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                         vertical_spacing=0.03, row_heights=[0.75, 0.25],
-                        subplot_titles=(f"{ticker} - PRICE ACTION & PROJECTIONS", "VOLUME"))
     # Candlestick
     fig.add_trace(go.Candlestick(
@@ -150,33 +167,27 @@ def generate_quant_dashboard(ticker, model_name):
     # Volume subplot
     colors = ['#00ff00' if row['Close'] >= row['Open'] else '#ff3333' for _, row in df_ui.iterrows()]
     fig.add_trace(go.Bar(x=df_ui['Date'], y=df_ui['Volume'], marker_color=colors, name='Volume'), row=2, col=1)
     # --- Thêm điểm dự báo và Confidence Interval (Error Bands) dựa trên ATR ---
     if model_name in ["Linear Regression", "Cả Hai"]:
-        fig.add_trace(go.Scatter(
-            x=[df_ui['Date'].iloc[-1], next_day], y=[last_close, price_lr],
-            mode='lines+markers', name='LR Target',
-            line=dict(color='#ff00ff', dash='dot'), marker=dict(size=10, symbol='diamond')
-        ))
     if model_name in ["SVR", "Cả Hai"]:
-        fig.add_trace(go.Scatter(
-            x=[df_ui['Date'].iloc[-1], next_day], y=[last_close, price_svr],
-            mode='lines+markers', name='SVR Target',
-            line=dict(color='#00ffff', dash='dot'), marker=dict(size=10, symbol='diamond')
-        ))
     # Error Band (±1 ATR cho mức target)
-    fig.add_trace(go.Scatter(
-        x=[next_day, next_day],
-        y=[target_price - atr_val, target_price + atr_val],
-        mode='lines', name='±1 ATR Volatility Band',
-        line=dict(color='rgba(255, 255, 255, 0.4)', width=5)
-    ))
     # Tối ưu giao diện Plotly Dark Mode
     fig.update_layout(
-        height=700,
         template="plotly_dark",
         plot_bgcolor='#0d0d14', paper_bgcolor='#0d0d14',
         margin=dict(l=40, r=40, t=40, b=40),
@@ -200,7 +211,7 @@ body { background-color: #0d0d14; color: #e6e6fa; font-family: 'Inter', sans-ser
 with gr.Blocks(title="Quant Terminal | Stock ML", css=css, theme=gr.themes.Monochrome()) as demo:
     gr.Markdown("""
     <div style="padding: 10px 0; border-bottom: 2px solid #333;">
-        <h1 style="color: #e6e6fa; margin: 0; font-family: monospace;">⚡ QUANTRONIC ML TERMINAL </h1>
         <p style="color: #8892b0; margin: 0; font-family: monospace;">SVR & Ridge Regression Predictive Analytics Engine</p>
     </div>
     """)
@@ -220,7 +231,7 @@ with gr.Blocks(title="Quant Terminal | Stock ML", css=css, theme=gr.themes.Monoc
         # MAIN AREA (Charts)
         with gr.Column(scale=3):
-            plot_chart = gr.Plot()
     btn_predict.click(
         fn=generate_quant_dashboard,

 import yfinance as yf
 from datetime import datetime, timedelta
+from src.inference import predict_horizons
 # --- CÁC HÀM HỖ TRỢ TÍNH TOÁN KỸ THUẬT CHO UI ---
 def calculate_ui_technical_indicators(df):
 def generate_quant_dashboard(ticker, model_name):
     try:
         # 1. Gọi Inference Engine
+        preds, last_close, last_date, _ = predict_horizons(ticker, model_name)
         # 2. Lấy dữ liệu OHLCV 90 ngày để vẽ Candlestick & tính toán Context
         # Sử dụng yfinance trực tiếp để render UI mượt mà, độc lập với backend load_data nặng nề
         rsi_val = last_row['RSI_14']
         macd_h  = last_row['MACD_Hist']
+        base_date = pd.to_datetime(last_date)
+        dates_future = {
+            1: base_date + pd.offsets.BDay(1),
+            7: base_date + pd.offsets.BDay(7),
+            21: base_date + pd.offsets.BDay(21)
+        }
     except Exception as e:
         error_html = f"""<div style='background-color:#3a1010; padding:15px; border-left: 4px solid #ff4d4d; color: #ff8080;'>
     consensus_html = ""
     target_price = 0
+    def get_avg_price(h):
+        if model_name == "Cả Hai":
+            return (preds[h]["Linear Regression"]["pred_price"] + preds[h]["SVR"]["pred_price"]) / 2
+        else:
+            return preds[h][model_name]["pred_price"]
+    target_1d = get_avg_price(1)
+    target_7d = get_avg_price(7)
+    target_21d = get_avg_price(21)
     if model_name == "Cả Hai":
         target_price = (price_lr + price_svr) / 2
         spread_bps = abs(pred_lr - pred_svr) * 10000 # Basis points
             direction = "UNCERTAIN / CHOPPY ⚠️"
         consensus_html = f"""
+    <div style="background:#1a1a24; border: 1px solid #333; padding: 15px; border-radius: 5px; font-family: monospace;">
+        <p style="color:#8892b0; margin:0 0 10px 0; font-size:12px;">LAST CLOSE: {last_date}</p>
+        <h2 style="color:white; margin:0 0 15px 0; border-bottom: 1px solid #333; padding-bottom: 10px;">${last_close:.2f}</h2>
+        <table style="width: 100%; color: #a8b2d1; font-size: 13px;">
+            <tr><td style="padding: 4px 0;">Target T+1 (Day)</td><td style="text-align: right; font-weight: bold; color: #ffd700;">${target_1d:.2f}</td></tr>
+            <tr><td style="padding: 4px 0;">Target T+7 (Week)</td><td style="text-align: right; font-weight: bold; color: #ffaa00;">${target_7d:.2f}</td></tr>
+            <tr><td style="padding: 4px 0;">Target T+21 (Month)</td><td style="text-align: right; font-weight: bold; color: #ff5500;">${target_21d:.2f}</td></tr>
+        </table>
+    </div>
+    """
     else:
         active_pred = pred_lr if model_name == "Linear Regression" else pred_svr
         target_price = price_lr if model_name == "Linear Regression" else price_svr
         dir_color = "#00ff00" if active_pred > 0 else "#ff3333"
         dir_text = "BULLISH 📈" if active_pred > 0 else "BEARISH 📉"
         consensus_html = f"""
+    <div style="background:#1a1a24; border: 1px solid #333; padding: 15px; border-radius: 5px; font-family: monospace;">
+        <p style="color:#8892b0; margin:0 0 10px 0; font-size:12px;">LAST CLOSE: {last_date}</p>
+        <h2 style="color:white; margin:0 0 15px 0; border-bottom: 1px solid #333; padding-bottom: 10px;">${last_close:.2f}</h2>
+        <table style="width: 100%; color: #a8b2d1; font-size: 13px;">
+            <tr><td style="padding: 4px 0;">Target T+1 (Day)</td><td style="text-align: right; font-weight: bold; color: #ffd700;">${target_1d:.2f}</td></tr>
+            <tr><td style="padding: 4px 0;">Target T+7 (Week)</td><td style="text-align: right; font-weight: bold; color: #ffaa00;">${target_7d:.2f}</td></tr>
+            <tr><td style="padding: 4px 0;">Target T+21 (Month)</td><td style="text-align: right; font-weight: bold; color: #ff5500;">${target_21d:.2f}</td></tr>
+        </table>
+    </div>
+    """
     # 4. Market Context Panel (Technical Stats)
     rsi_color = "#ff3333" if rsi_val > 70 else ("#00ff00" if rsi_val < 30 else "#a8b2d1")
     # 5. Vẽ biểu đồ Plotly cấp độ Institutional
     fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                         vertical_spacing=0.03, row_heights=[0.75, 0.25],
+                        subplot_titles=(f"{ticker} - MULTI-HORIZON PROJECTIONS", "VOLUME"))
     # Candlestick
     fig.add_trace(go.Candlestick(
     # Volume subplot
     colors = ['#00ff00' if row['Close'] >= row['Open'] else '#ff3333' for _, row in df_ui.iterrows()]
     fig.add_trace(go.Bar(x=df_ui['Date'], y=df_ui['Volume'], marker_color=colors, name='Volume'), row=2, col=1)
+    x_future = [base_date, dates_future[1], dates_future[7], dates_future[21]]
     # --- Thêm điểm dự báo và Confidence Interval (Error Bands) dựa trên ATR ---
     if model_name in ["Linear Regression", "Cả Hai"]:
+        y_lr = [last_close, preds[1]["Linear Regression"]["pred_price"],
+                preds[7]["Linear Regression"]["pred_price"], preds[21]["Linear Regression"]["pred_price"]]
+        fig.add_trace(go.Scatter(x=x_future, y=y_lr, mode='lines+markers', name='LR Trajectory',
+            line=dict(color='#ff00ff', dash='dot'), marker=dict(size=8, symbol='diamond')), row=1, col=1)
     if model_name in ["SVR", "Cả Hai"]:
+        y_svr = [last_close, preds[1]["SVR"]["pred_price"],
+                 preds[7]["SVR"]["pred_price"], preds[21]["SVR"]["pred_price"]]
+        fig.add_trace(go.Scatter(x=x_future, y=y_svr, mode='lines+markers', name='SVR Trajectory',
+            line=dict(color='#00ffff', dash='dot'), marker=dict(size=8, symbol='diamond')), row=1, col=1)
+    upper_band = [last_close, target_1d + atr_val*np.sqrt(1), target_7d + atr_val*np.sqrt(7), target_21d + atr_val*np.sqrt(21)]
+    lower_band = [last_close, target_1d - atr_val*np.sqrt(1), target_7d - atr_val*np.sqrt(7), target_21d - atr_val*np.sqrt(21)]
     # Error Band (±1 ATR cho mức target)
+    fig.add_trace(go.Scatter(x=x_future, y=upper_band, mode='lines', name='Risk Cone Upper', line=dict(color='rgba(255, 255, 255, 0.2)')), row=1, col=1)
+    fig.add_trace(go.Scatter(x=x_future, y=lower_band, mode='lines', fill='tonexty', fillcolor='rgba(255, 255, 255, 0.05)', name='Risk Cone Lower', line=dict(color='rgba(255, 255, 255, 0.2)')), row=1, col=1)
     # Tối ưu giao diện Plotly Dark Mode
     fig.update_layout(
         template="plotly_dark",
         plot_bgcolor='#0d0d14', paper_bgcolor='#0d0d14',
         margin=dict(l=40, r=40, t=40, b=40),
 with gr.Blocks(title="Quant Terminal | Stock ML", css=css, theme=gr.themes.Monochrome()) as demo:
     gr.Markdown("""
     <div style="padding: 10px 0; border-bottom: 2px solid #333;">
+        <h1 style="color: #e6e6fa; margin: 0; font-family: monospace;">⚡ QUANTRONIC ML TERMINAL v2.0</h1>
         <p style="color: #8892b0; margin: 0; font-family: monospace;">SVR & Ridge Regression Predictive Analytics Engine</p>
     </div>
     """)
         # MAIN AREA (Charts)
         with gr.Column(scale=3):
+            plot_chart = gr.Plot(height=700)
     btn_predict.click(
         fn=generate_quant_dashboard,

src/data_processing.py CHANGED Viewed

@@ -88,7 +88,7 @@ def validate_data(df, stage="pre_feature"):
     print(f"Validation passed at {stage} (no critical issues).")
     return df
-def generate_technical_features(df, is_inference=False):
     """
     Feature Engineering hoàn toàn mới theo 5 yêu cầu:
     1. Corporate actions đã được xử lý ở load_data (auto_adjust=True)
@@ -181,17 +181,17 @@ def generate_technical_features(df, is_inference=False):
     data = pd.concat(data_list, ignore_index=True)
     if not is_inference:
-        data['Target_Return'] = data.groupby('Ticker')['Daily_Return'].shift(-1)
         data = data.dropna().reset_index(drop=True)
         # === 5. DATA VALIDATION TRƯỚC KHI TRẢ VỀ ===
-        data = validate_data(data, stage="post_feature_engineering")
         df_backtest = data.copy()
         drop_cols = ['Date', 'Ticker', 'Market_Close', 'Target_Return']
         X = data.drop(columns=drop_cols, errors='ignore')
         y = data['Target_Return'].copy()
-        print(f"Generated stationary features & prepared ML data:\n"
             f"   • Total rows: {len(data)} | Tickers: {data['Ticker'].nunique()}\n"
             f"   • Features: {X.shape[1]} | X shape: {X.shape} | y shape: {y.shape}")

     print(f"Validation passed at {stage} (no critical issues).")
     return df
+def generate_technical_features(df, is_inference=False, target_horizon=1):
     """
     Feature Engineering hoàn toàn mới theo 5 yêu cầu:
     1. Corporate actions đã được xử lý ở load_data (auto_adjust=True)
     data = pd.concat(data_list, ignore_index=True)
     if not is_inference:
+        data['Target_Return'] = data.groupby('Ticker')['Close'].shift(-target_horizon) / data['Close'] - 1
         data = data.dropna().reset_index(drop=True)
         # === 5. DATA VALIDATION TRƯỚC KHI TRẢ VỀ ===
+        data = validate_data(data, f"post_feature_engineering_h{target_horizon}")
         df_backtest = data.copy()
         drop_cols = ['Date', 'Ticker', 'Market_Close', 'Target_Return']
         X = data.drop(columns=drop_cols, errors='ignore')
         y = data['Target_Return'].copy()
+        print(f"Generated data for Horizon {target_horizon} days:\n"
             f"   • Total rows: {len(data)} | Tickers: {data['Ticker'].nunique()}\n"
             f"   • Features: {X.shape[1]} | X shape: {X.shape} | y shape: {y.shape}")

src/inference.py CHANGED Viewed

@@ -7,7 +7,7 @@ from src.data_processing import load_data, clean_data, generate_technical_featur
 REPO_ID = "Reality8081/Predict_Stock_SVR_Linear" # << THAY ĐỔI DÒNG NÀY TƯƠNG TỰ
 MARKET_SYMBOL = "^GSPC"
 # Tự động tải models từ Hugging Face nếu chưa có tại local
 def download_model_if_not_exists(filename):
     local_path = os.path.join("models", filename)
@@ -17,8 +17,7 @@ def download_model_if_not_exists(filename):
         return path
     return local_path
-def predict_next_day(ticker, model_name):
-    # Lấy data 150 ngày gần nhất để tính đủ các window (SMA 100 cần ít nhất 100 nến)
     end_date = datetime.now()
     start_date = end_date - timedelta(days=150)
@@ -26,36 +25,33 @@ def predict_next_day(ticker, model_name):
     df_clean = clean_data(df_raw)
     df_features, X, _ = generate_technical_features(df_clean, is_inference=True)
-    if len(X) == 0:
-        raise ValueError(f"Không đủ dữ liệu cho {ticker} để tạo đặc trưng.")
-    # Lấy dòng cuối cùng (ngày giao dịch gần nhất)
     latest_X = X.iloc[[-1]]
     latest_data = df_features.iloc[-1]
     last_close = latest_data['Close']
     last_date = latest_data['Date'].strftime('%Y-%m-%d')
-    predictions = {}
-    if model_name in ["Linear Regression", "Cả Hai"]:
-        scaler_lr = joblib.load(download_model_if_not_exists('scaler_Linear.pkl'))
-        model_lr = joblib.load(download_model_if_not_exists('trained_model_Linear.pkl'))
-        pred_return_lr = model_lr.predict(scaler_lr.transform(latest_X))[0]
-        predictions["Linear Regression"] = {
-            "pred_return": pred_return_lr,
-            "pred_price": last_close * (1 + pred_return_lr)
-        }
-    if model_name in ["SVR", "Cả Hai"]:
-        scaler_svr = joblib.load(download_model_if_not_exists('scaler_SVR.pkl'))
-        model_svr = joblib.load(download_model_if_not_exists('trained_model_SVR.pkl'))
-        pred_return_svr = model_svr.predict(scaler_svr.transform(latest_X))[0]
-        predictions["SVR"] = {
-            "pred_return": pred_return_svr,
-            "pred_price": last_close * (1 + pred_return_svr)
-        }
-    # Lịch sử giá 30 phiên để vẽ biểu đồ
     historical_30 = df_features[['Date', 'Close']].tail(30)
     return predictions, last_close, last_date, historical_30

 REPO_ID = "Reality8081/Predict_Stock_SVR_Linear" # << THAY ĐỔI DÒNG NÀY TƯƠNG TỰ
 MARKET_SYMBOL = "^GSPC"
+HORIZONS = [1, 7, 21]
 # Tự động tải models từ Hugging Face nếu chưa có tại local
 def download_model_if_not_exists(filename):
     local_path = os.path.join("models", filename)
         return path
     return local_path
+def predict_horizons(ticker, model_name):
     end_date = datetime.now()
     start_date = end_date - timedelta(days=150)
     df_clean = clean_data(df_raw)
     df_features, X, _ = generate_technical_features(df_clean, is_inference=True)
+    if len(X) == 0: raise ValueError(f"Không đủ dữ liệu cho {ticker}.")
     latest_X = X.iloc[[-1]]
     latest_data = df_features.iloc[-1]
     last_close = latest_data['Close']
     last_date = latest_data['Date'].strftime('%Y-%m-%d')
+    predictions = {1: {}, 7: {}, 21: {}}
+    for h in HORIZONS:
+        if model_name in ["Linear Regression", "Cả Hai"]:
+            scaler_lr = joblib.load(download_model_if_not_exists(f'scaler_lr_{h}d.pkl'))
+            model_lr = joblib.load(download_model_if_not_exists(f'model_lr_{h}d.pkl'))
+            pred_return_lr = model_lr.predict(scaler_lr.transform(latest_X))[0]
+            predictions[h]["Linear Regression"] = {
+                "pred_return": pred_return_lr,
+                "pred_price": last_close * (1 + pred_return_lr)
+            }
+        if model_name in ["SVR", "Cả Hai"]:
+            scaler_svr = joblib.load(download_model_if_not_exists(f'scaler_svr_{h}d.pkl'))
+            model_svr = joblib.load(download_model_if_not_exists(f'model_svr_{h}d.pkl'))
+            pred_return_svr = model_svr.predict(scaler_svr.transform(latest_X))[0]
+            predictions[h]["SVR"] = {
+                "pred_return": pred_return_svr,
+                "pred_price": last_close * (1 + pred_return_svr)
+            }
     historical_30 = df_features[['Date', 'Close']].tail(30)
     return predictions, last_close, last_date, historical_30

src/train.py CHANGED Viewed

@@ -17,104 +17,105 @@ MARKET_SYMBOL = "^GSPC"
 START_DATE = "2010-01-01"
 END_DATE = datetime.now().strftime('%Y-%m-%d')
 REPO_ID = "Reality8081/Predict_Stock_SVR_Linear" # << THAY ĐỔI DÒNG NÀY
 def main():
     print("1. Đang tải và làm sạch dữ liệu...")
     df_raw = load_data(SYMBOLS, MARKET_SYMBOL, START_DATE, END_DATE)
     df_clean = clean_data(df_raw)
-    print("2. Tạo đặc trưng (Features)...")
-    _, X, y = generate_technical_features(df_clean, is_inference=False)
-    tscv = TimeSeriesSplit(n_splits=5)
-    # === TỐI ƯU LINEAR REGRESSION (RIDGE) ===
-    print("3. Tối ưu siêu tham số Ridge Regression...")
-    def objective_lr(trial):
-        alpha = trial.suggest_float('alpha', 1e-4, 1e4, log=True)
         tscv = TimeSeriesSplit(n_splits=5)
-        fold_scores = []
-        for train_idx, val_idx in tscv.split(X):
-            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-            y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
-            scaler = StandardScaler()
-            X_train_scaled = scaler.fit_transform(X_train)
-            X_val_scaled = scaler.transform(X_val)
-            model = Ridge(alpha=alpha, random_state=42)
-            model.fit(X_train_scaled, y_train)
-            preds = model.predict(X_val_scaled)
-            rmse = np.sqrt(mean_squared_error(y_val, preds))
-            fold_scores.append(rmse)
-        return np.mean(fold_scores)
-    study_lr = optuna.create_study(direction='minimize')
-    study_lr.optimize(objective_lr, n_trials=20)
-    best_alpha = study_lr.best_params['alpha']
-    # === TỐI ƯU SVR ===
-    print("4. Tối ưu siêu tham số SVR...")
-    def objective_svr(trial):
-        # Chỉ tối ưu siêu tham số SVR
-        kernel = trial.suggest_categorical('kernel', ['linear', 'rbf'])
-        C = trial.suggest_float('C', 1e-3, 100.0, log=True)
-        epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)
-        gamma = trial.suggest_categorical('gamma', ['scale', 'auto']) if kernel == 'rbf' else 'scale'
-        # Chuẩn bị data với feature cố định
-        tscv = TimeSeriesSplit(n_splits=5)
-        fold_scores = []
-        for train_idx, val_idx in tscv.split(X):
-            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-            y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
-            scaler = StandardScaler()
-            X_train_scaled = scaler.fit_transform(X_train)
-            X_val_scaled = scaler.transform(X_val)
-            X_train_scaled = X_train_scaled.astype('float32')
-            X_val_scaled   = X_val_scaled.astype('float32')
-            y_train_f32    = y_train.values.astype('float32')
-            y_val_f32      = y_val.values.astype('float32')
-            model = SVR(kernel=kernel, C=C, epsilon=epsilon, gamma=gamma, max_iter=5000)
-            model.fit(X_train_scaled, y_train)
-            preds = model.predict(X_val_scaled)
-            rmse = np.sqrt(mean_squared_error(y_val, preds))
-            fold_scores.append(rmse)
-        return np.mean(fold_scores)
-    study_svr = optuna.create_study(direction='minimize')
-    study_svr.optimize(objective_svr, n_trials=10) # Set số trial vừa phải
-    # === HUẤN LUYỆN MODEL CUỐI CÙNG & LƯU LẠI ===
-    print("5. Huấn luyện mô hình cuối và lưu trữ...")
-    os.makedirs("models", exist_ok=True)
-    # Ridge
-    scaler_lr = StandardScaler()
-    X_scaled_lr = scaler_lr.fit_transform(X)
-    model_lr = Ridge(alpha=best_alpha, random_state=42)
-    model_lr.fit(X_scaled_lr, y)
-    joblib.dump(scaler_lr, 'models/scaler_lr.pkl')
-    joblib.dump(model_lr, 'models/model_lr.pkl')
-    # SVR
-    scaler_svr = StandardScaler()
-    X_scaled_svr = scaler_svr.fit_transform(X)
-    model_svr = SVR(kernel='rbf', C=study_svr.best_params['C'], epsilon=study_svr.best_params['epsilon'], gamma='scale')
-    model_svr.fit(X_scaled_svr, y)
-    joblib.dump(scaler_svr, 'models/scaler_svr.pkl')
-    joblib.dump(model_svr, 'models/model_svr.pkl')
     print("6. Tải mô hình lên Hugging Face Hub...")
     hf_token = os.environ.get("HF_TOKEN")

 START_DATE = "2010-01-01"
 END_DATE = datetime.now().strftime('%Y-%m-%d')
 REPO_ID = "Reality8081/Predict_Stock_SVR_Linear" # << THAY ĐỔI DÒNG NÀY
+HORIZONS = [1, 7, 21]
 def main():
     print("1. Đang tải và làm sạch dữ liệu...")
     df_raw = load_data(SYMBOLS, MARKET_SYMBOL, START_DATE, END_DATE)
     df_clean = clean_data(df_raw)
+    os.makedirs("models", exist_ok=True)
+    for h in HORIZONS:
+        print("2. Tạo đặc trưng (Features)...")
+        _, X, y = generate_technical_features(df_clean, is_inference=False, target_horizon=h)
         tscv = TimeSeriesSplit(n_splits=5)
+        # === TỐI ƯU LINEAR REGRESSION (RIDGE) ===
+        print("3. Tối ưu siêu tham số Ridge Regression...")
+        def objective_lr(trial):
+            alpha = trial.suggest_float('alpha', 1e-4, 1e4, log=True)
+            tscv = TimeSeriesSplit(n_splits=5)
+            fold_scores = []
+            for train_idx, val_idx in tscv.split(X):
+                X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
+                y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
+                scaler = StandardScaler()
+                X_train_scaled = scaler.fit_transform(X_train)
+                X_val_scaled = scaler.transform(X_val)
+                model = Ridge(alpha=alpha, random_state=42)
+                model.fit(X_train_scaled, y_train)
+                preds = model.predict(X_val_scaled)
+                rmse = np.sqrt(mean_squared_error(y_val, preds))
+                fold_scores.append(rmse)
+            return np.mean(fold_scores)
+        study_lr = optuna.create_study(direction='minimize')
+        study_lr.optimize(objective_lr, n_trials=20)
+        best_alpha = study_lr.best_params['alpha']
+        # === TỐI ƯU SVR ===
+        print("4. Tối ưu siêu tham số SVR...")
+        def objective_svr(trial):
+            # Chỉ tối ưu siêu tham số SVR
+            kernel = trial.suggest_categorical('kernel', ['linear', 'rbf'])
+            C = trial.suggest_float('C', 1e-3, 100.0, log=True)
+            epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)
+            gamma = trial.suggest_categorical('gamma', ['scale', 'auto']) if kernel == 'rbf' else 'scale'
+            # Chuẩn bị data với feature cố định
+            tscv = TimeSeriesSplit(n_splits=5)
+            fold_scores = []
+            for train_idx, val_idx in tscv.split(X):
+                X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
+                y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
+                scaler = StandardScaler()
+                X_train_scaled = scaler.fit_transform(X_train)
+                X_val_scaled = scaler.transform(X_val)
+                X_train_scaled = X_train_scaled.astype('float32')
+                X_val_scaled   = X_val_scaled.astype('float32')
+                y_train_f32    = y_train.values.astype('float32')
+                y_val_f32      = y_val.values.astype('float32')
+                model = SVR(kernel=kernel, C=C, epsilon=epsilon, gamma=gamma, max_iter=5000)
+                model.fit(X_train_scaled, y_train)
+                preds = model.predict(X_val_scaled)
+                rmse = np.sqrt(mean_squared_error(y_val, preds))
+                fold_scores.append(rmse)
+            return np.mean(fold_scores)
+        study_svr = optuna.create_study(direction='minimize')
+        study_svr.optimize(objective_svr, n_trials=10) # Set số trial vừa phải
+        # === HUẤN LUYỆN MODEL CUỐI CÙNG & LƯU LẠI ===
+        print("5. Huấn luyện mô hình cuối và lưu trữ...")
+        os.makedirs("models", exist_ok=True)
+        # Ridge
+        scaler_lr = StandardScaler()
+        X_scaled_lr = scaler_lr.fit_transform(X)
+        model_lr = Ridge(alpha=best_alpha, random_state=42)
+        model_lr.fit(X_scaled_lr, y)
+        joblib.dump(scaler_lr, f'models/scaler_lr_{h}d.pkl')
+        joblib.dump(model_lr, f'models/model_lr_{h}d.pkl')
+        # SVR
+        scaler_svr = StandardScaler()
+        X_scaled_svr = scaler_svr.fit_transform(X)
+        model_svr = SVR(kernel='rbf', C=study_svr.best_params['C'], epsilon=study_svr.best_params['epsilon'], gamma='scale')
+        model_svr.fit(X_scaled_svr, y)
+        joblib.dump(scaler_svr, f'models/scaler_svr_{h}d.pkl')
+        joblib.dump(model_svr, f'models/model_svr_{h}d.pkl')
     print("6. Tải mô hình lên Hugging Face Hub...")
     hf_token = os.environ.get("HF_TOKEN")