Spaces:

nafisneehal
/

trade-mind

Runtime error

App Files Files Community

nafisneehal commited on Nov 12, 2024

Commit

73ac9f6

verified ·

1 Parent(s): 2d1e9e0

Upload 3 files

Browse files

Files changed (3) hide show

Trainer.py +235 -0
fetch_plot_data.py +81 -0
gradio_app.py +155 -0

Trainer.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import hopsworks
+import pandas as pd
+import os
+from datetime import datetime, timedelta
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from sklearn.model_selection import train_test_split
+import joblib
+from pathlib import Path
+import hsfs
+import hsml
+# Define the base directory as the project root
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+class Trainer:
+    def __init__(self, project_name, feature_group_name, model_registry_name, api_key):
+        self.project_name = project_name
+        self.feature_group_name = feature_group_name
+        self.model_registry_name = model_registry_name
+        self.api_key = api_key
+        self.project = hopsworks.login(api_key_value=self.api_key)
+        self.fs = self.project.get_feature_store()
+        self.model_registry = self.project.get_model_registry()
+        self.feature_view = None
+        self.deployment = None
+    def create_feature_view(self):
+        """Select features from the feature group and create a feature view."""
+        selected_features = self.fs.get_or_create_feature_group(
+            name=self.feature_group_name,
+            version=1
+        ).select_all()
+        print("Feature group selected successfully......... --->>")
+        """Create or get a feature view for the last 30 days of data."""
+        try:
+            self.feature_view = self.fs.get_or_create_feature_view(
+                name=f"{self.feature_group_name}_view",
+                version=1,
+                description="Feature view with last 30 days of data for model training",
+                query=selected_features,
+            )
+            print("Feature view created or retrieved successfully.")
+        except hsfs.client.exceptions.RestAPIError as e:
+            print(f"Error creating feature view: {e}")
+    def delete_feature_view(self):
+        """Delete the feature view."""
+        try:
+            self.feature_view.delete()
+            print("Feature view deleted successfully.")
+        except hsfs.client.exceptions.RestAPIError as e:
+            print(f"Error deleting feature view: {e}")
+    def get_retrain_data_from_feature_view(self):
+        """Pull the last 30 days of data from the feature view till today."""
+        start_time = datetime.now() - timedelta(days=30)
+        end_time = datetime.now()
+        # Get the data as a DataFrame from the feature view
+        df = self.feature_view.get_batch_data(
+            start_time=start_time, end_time=end_time)
+        # sort by datetime
+        df = df.sort_values(by='datetime', ascending=False)
+        print("Data pulled from feature view for retraining successfully.")
+        return df
+    def get_plot_data_from_feature_view(self, hours):
+        # get last 12 hours of data starting from current hour to plot
+        start_time = datetime.now() - timedelta(hours=hours)
+        end_time = datetime.now()
+        # Get the data as a DataFrame from the feature view
+        df = self.feature_view.get_batch_data(
+            start_time=start_time, end_time=end_time)
+        # sort by datetime
+        df = df.sort_values(by='datetime', ascending=False)
+        print("Data pulled from feature view for plotting successfully.")
+        return df
+    def train_test_split(self, df, test_size=0.2):
+        """Split data into training and test sets."""
+        # Define feature columns based on lagged features
+        feature_columns = [
+            f"{prefix}_lag_{i}" for i in range(0, 13) for prefix in ["open", "high", "low", "close"]
+        ]
+        # Separate features and target
+        X = df[feature_columns]
+        y = df['target']
+        # Split into train and test sets
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=test_size, random_state=42)
+        print("Data split into train and test sets.")
+        return X_train, X_test, y_train, y_test
+    def get_features_labels(self, df):
+        """Split data into features and labels."""
+        # Define feature columns based on lagged features
+        feature_columns = [
+            f"{prefix}_lag_{i}" for i in range(0, 13) for prefix in ["open", "high", "low", "close"]
+        ]
+        # Separate features and target
+        X = df[feature_columns]
+        y = df['target']
+        return X, y
+    def train_model(self, model, X_train, y_train):
+        """Train the model on training data."""
+        model.fit(X_train, y_train)
+        print("Model training completed.")
+        return model
+    def evaluate_model(self, model, X_test, y_test, **kwargs):
+        """Evaluate the model on the hold-out test set."""
+        y_pred = model.predict(X_test)
+        # if show_pred in kwargs is true, print the predictions
+        if "show_pred" in kwargs:
+            print(f"Predictions: {y_pred}")
+        mse = mean_squared_error(y_test, y_pred)
+        mae = mean_absolute_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        print(f"Model Evaluation:\nMSE: {mse}\nMAE: {mae}\nR2 Score: {r2}")
+        return {"mse": mse, "mae": mae, "r2": r2}
+    def save_model_to_registry(self, model, metrics, model_schema, X_train):
+        """Save the trained model to Hopsworks Model Registry."""
+        # Use BASE_DIR to define the model directory and path
+        model_dir = BASE_DIR / "models"
+        # Ensure the directory exists
+        if not model_dir.exists():
+            model_dir.mkdir(parents=True, exist_ok=True)
+        model_path = model_dir / f"{self.model_registry_name}.pkl"
+        joblib.dump(model, model_path)
+        new_model = self.model_registry.sklearn.create_model(
+            name=self.model_registry_name,
+            metrics=metrics,
+            model_schema=model_schema,
+            input_example=X_train.sample(),
+            description="Trained model with 30-day feature view data",
+        )
+        # Register the model and serve as endpoint
+        new_model.save(str(model_path))
+        # new_model.deploy()
+        print("Model saved to registry successfully.")
+    def model_deploy(self):
+        model = self.model_registry.get_model(
+            self.model_registry_name)
+        # strip all _ from self.model_registry_name and keep only alphanumeric characters
+        deploy_name = self.model_registry_name.replace("_", "")
+        # Get the dataset API for the project
+        dataset_api = self.project.get_dataset_api()
+        # Upload the file "predict_example.py" to the "Models" dataset
+        # If a file with the same name already exists, overwrite it
+        predictor_local_path = BASE_DIR / "src" / \
+            "training_pipeline" / "kserve_predict_script.py"
+        uploaded_file_path = dataset_api.upload(
+            predictor_local_path, "Models", overwrite=True)
+        # Construct the full path to the uploaded predictor script
+        predictor_script_path = os.path.join(
+            "/Projects", self.project_name, uploaded_file_path)
+        self.deployment = model.deploy(
+            name=deploy_name,
+            script_file=predictor_script_path,)
+        # start the deployment
+        self.deployment.start()
+    def predict_with_hopsworks_api(self, X):
+        """Use the deployed model to make predictions via the Hopsworks API."""
+        # Get model serving handle from the project
+        model_serving = self.project.get_model_serving()
+        model = self.model_registry.get_model(
+            self.model_registry_name, version=1)
+        # Ensure the deployment name follows the required regex pattern
+        deploy_name = self.model_registry_name.replace("_", "")
+        try:
+            # Get the deployment
+            deployment = model_serving.get_deployment(name=deploy_name)
+            # Make predictions
+            predictions = deployment.predict(inputs=X.values.tolist())
+            print("Predictions made via Hopsworks model API.")
+            return predictions
+        except hsml.client.exceptions.RestAPIError as e:
+            print(f"Error making predictions: {e}")
+            return None
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            return None
+    def stop_model_deployment(self):
+        model = self.model_registry.get_model(
+            self.model_registry_name, version=1)
+        # Ensure the deployment name follows the required regex pattern
+        deploy_name = self.model_registry_name.replace("_", "")
+        # Get model serving handle
+        model_serving = self.project.get_model_serving()
+        try:
+            # List deployments
+            deployments = model_serving.get_deployments(model)
+            for deployment in deployments:
+                if deployment.name == deploy_name:
+                    # deployment.stop()
+                    deployment.delete(force=True)
+                    print(
+                        f"Deployment {deploy_name} stopped and deleted successfully.")
+                    break
+            else:
+                print(f"No deployment found with name: {deploy_name}")
+        except hsml.client.exceptions.RestAPIError as e:
+            print(f"Error stopping or deleting deployment: {e}")
+        return model

fetch_plot_data.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import pprint
+from dotenv import load_dotenv
+import yaml
+from pathlib import Path
+from Trainer import Trainer  # Assuming Trainer.py is in the same directory
+import requests
+import os
+import json
+import warnings
+import pandas as pd
+import hsml
+warnings.filterwarnings('ignore')
+load_dotenv()
+# Hopsworks API configuration
+# Or replace with your actual API key
+HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
+# Define the base directory as the project root
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+# Use BASE_DIR to dynamically load the config file
+CONFIG_FILE = BASE_DIR / "src" / "config.yml"
+with open(CONFIG_FILE, 'r') as file:
+    configs = yaml.safe_load(file)
+# Initialize Trainer instance with Hopsworks project configurations
+symbol = configs['stock_api_params']['symbol']
+# Initialize Trainer with relevant project details
+trainer = Trainer(
+    project_name=configs['hopsworks']['project_name'],
+    feature_group_name=f"{symbol.split('/')[0].lower()}_features",
+    model_registry_name=f"{symbol.split('/')[0].lower()}_regressor_model",
+    api_key=os.getenv("HOPSWORKS_API_KEY")
+)
+def return_plot_data(hours):
+    # Create or retrieve feature view
+    trainer.create_feature_view()
+    # Get the plot data from the feature view
+    input_df = trainer.get_plot_data_from_feature_view(hours)
+    # get the datetime column from the input_df
+    datetime_column = input_df['datetime']
+    input_features, input_labels = trainer.get_features_labels(input_df)
+    return input_features, input_labels, datetime_column
+def return_plot_data_prediction(input_features):
+    # Get the prediction
+    prediction = trainer.predict_with_hopsworks_api(input_features)
+    return prediction
+def get_plot_data(hours):
+    # Get the plot data
+    input_features, input_labels, datetime_column = return_plot_data(
+        hours)
+    prediction = return_plot_data_prediction(input_features)
+    return {"features": input_features, "labels": input_labels,
+            "prediction": prediction['predictions'], "datetime": datetime_column}
+# f, l, d = return_plot_data()
+# print(f)
+# print(l)
+# print(trainer.predict_with_hopsworks_api(f))
+# # Example input data (replace with your actual input structure)
+# input_ls = [76480.91, 76648.94, 76390.51, 76541.99, 76330.78, 76339.94, 76312.67, 76319.28, 76246.58, 76413.26, 76206.41, 76333.14, 76396.64, 76732.32, 76151.9, 76244.62, 76279.09, 76429.21, 76222.1, 76396.63, 76122.3, 76283.43, 75758.58,
+#             76272.1, 76349.99, 76366.2, 76093.0, 76117.98, 76395.53, 76456.16, 76319.87, 76348.18, 76461.01, 76481.48, 76300.38, 76395.53, 76330.91, 76517.26, 76323.53, 76461.02, 76532.39, 76583.19, 76319.32, 76330.91, 76509.82, 76570.6, 76415.72, 76534.61]
+# input_columns = ["open_lag_1", "high_lag_1", "low_lag_1", "close_lag_1", "open_lag_2", "high_lag_2", "low_lag_2", "close_lag_2", "open_lag_3", "high_lag_3", "low_lag_3", "close_lag_3", "open_lag_4", "high_lag_4", "low_lag_4", "close_lag_4", "open_lag_5", "high_lag_5", "low_lag_5", "close_lag_5", "open_lag_6", "high_lag_6", "low_lag_6", "close_lag_6",
+#                  "open_lag_7", "high_lag_7", "low_lag_7", "close_lag_7", "open_lag_8", "high_lag_8", "low_lag_8", "close_lag_8", "open_lag_9", "high_lag_9", "low_lag_9", "close_lag_9", "open_lag_10", "high_lag_10", "low_lag_10", "close_lag_10", "open_lag_11", "high_lag_11", "low_lag_11", "close_lag_11", "open_lag_12", "high_lag_12", "low_lag_12", "close_lag_12"]
+# input_df = pd.DataFrame([input_ls], columns=input_columns)

gradio_app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+from fetch_plot_data import get_plot_data
+def get_time_series_data():
+    # Fetch and process data
+    plot_data = get_plot_data(hours=24)
+    plot_data["datetime"] = pd.to_datetime(plot_data["datetime"])
+    time_series_data = pd.DataFrame({
+        "Datetime": plot_data["datetime"],
+        "Actual BTC/USD": plot_data["labels"],
+        "Predicted BTC/USD": plot_data["prediction"]
+    })
+    time_series_data = time_series_data.sort_values(by="Datetime")
+    time_series_data["Datetime"] = time_series_data["Datetime"].dt.strftime(
+        "%Y-%m-%d %H:%M")
+    all_values = np.concatenate([time_series_data["Actual BTC/USD"],
+                                 time_series_data["Predicted BTC/USD"]])
+    y_min = np.min(all_values)
+    y_max = np.max(all_values)
+    y_range = y_max - y_min
+    padding = y_range * 0.0005
+    y_min = y_min - padding
+    y_max = y_max + padding
+    long_data = time_series_data.melt(
+        id_vars="Datetime",
+        var_name="Series",
+        value_name="BTC/USD Value"
+    )
+    return (long_data, y_min, y_max)
+custom_css = """
+body {
+    background-color: #f8fafc !important;
+}
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 2rem auto !important;
+    padding: 2rem !important;
+    background-color: white !important;
+    border-radius: 1rem !important;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06) !important;
+}
+.main-title {
+    color: #1e293b !important;
+    font-size: 2.5rem !important;
+    font-weight: 700 !important;
+    text-align: center !important;
+    margin-bottom: 0.5rem !important;
+    line-height: 1.2 !important;
+}
+.subtitle {
+    color: #64748b !important;
+    font-size: 1.125rem !important;
+    text-align: center !important;
+    margin-bottom: 1.5rem !important;
+    font-weight: 500 !important;
+}
+.chart-container {
+    margin-bottom: 1rem !important;
+}
+.footer-content {
+    margin-top: 1rem !important;
+    padding-top: 1rem !important;
+    border-top: 1px solid #e2e8f0 !important;
+    display: flex !important;
+    justify-content: space-between !important;
+    align-items: center !important;
+    color: #64748b !important;
+    font-size: 0.875rem !important;
+}
+.footer-left {
+    text-align: left !important;
+}
+.footer-right {
+    text-align: right !important;
+}
+.developer-info {
+    color: #3b82f6 !important;
+    font-weight: 500 !important;
+    text-decoration: none !important;
+    transition: color 0.2s !important;
+}
+.developer-info:hover {
+    color: #2563eb !important;
+}
+"""
+# Initialize the Gradio app
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
+    with gr.Column():
+        # Title and subtitle
+        gr.Markdown("""
+            <div class="main-title">Live BTC/USD Time Series Info</div>
+            <div class="subtitle">Predictions served via Hopsworks API</div>
+        """)
+        initial_data, initial_y_min, initial_y_max = get_time_series_data()
+        # Chart with reduced bottom margin
+        with gr.Column(elem_classes=["chart-container"]):
+            line_plot = gr.LinePlot(
+                value=initial_data,
+                x="Datetime",
+                y="BTC/USD Value",
+                color="Series",
+                title="",
+                y_title="BTC/USD Value",
+                x_title="Time",
+                x_label_angle=45,
+                width=1000,
+                height=450,  # Slightly reduced height
+                colors={
+                    "Actual BTC/USD": "#3b82f6",
+                    "Predicted BTC/USD": "#ef4444"
+                },
+                tooltip=["Datetime", "BTC/USD Value", "Series"],
+                overlay_point=True,
+                zoom=False,
+                pan=False,
+                show_label=True,
+                stroke_width=2,
+                y_min=initial_y_min,
+                y_max=initial_y_max,
+                y_lim=[initial_y_min, initial_y_max],
+                show_grid=True,
+            )
+        # Footer with timestamp and developer info
+        gr.Markdown(f"""
+            <div class="footer-content">
+                <div class="footer-left">
+                    Last updated: {pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")}
+                    <br>
+                    <a href="https://nafis-neehal.github.io/" target="_blank" class="developer-info">Developed by Nafis Neehal</a>
+                </div>
+            </div>
+        """)
+# Launch the app
+app.launch()