Spaces:

AshenH
/

ALM_LLM

Sleeping

App Files Files Community

AshenH commited on Oct 8

Commit

32817e1

verified ·

1 Parent(s): 54614e9

Update tools/predict_tool.py

Browse files

Files changed (1) hide show

tools/predict_tool.py +4 -30

tools/predict_tool.py CHANGED Viewed

@@ -12,16 +12,7 @@ from utils.tracing import Tracer
 class PredictTool:
     """
-    Loads a sklearn-compatible tabular model artifact from a private/public
-    Hugging Face repo and runs batch predictions on a DataFrame.
-    Expects:
-      - model.pkl
-      - feature_metadata.json  (optional but recommended)
-        {
-          "feature_order": ["col1","col2",...],
-          "prediction_column": "prediction",
-          "task": "classification" | "regression"
-        }
     """
     def __init__(self, cfg: AppConfig, tracer: Tracer):
         self.cfg = cfg
@@ -35,23 +26,14 @@ class PredictTool:
         if self._model is not None:
             return
-        token = os.getenv("HF_TOKEN")  # OK if None for public repos
         repo = self.cfg.hf_model_repo
-        model_path = hf_hub_download(
-            repo_id=repo,
-            filename="model.pkl",
-            token=token
-        )
         self._model = joblib.load(model_path)
-        # feature metadata is optional; handle gracefully
         try:
-            meta_path = hf_hub_download(
-                repo_id=repo,
-                filename="feature_metadata.json",
-                token=token
-            )
             with open(meta_path, "r", encoding="utf-8") as f:
                 self._feature_meta = json.load(f) or {}
         except Exception:
@@ -62,18 +44,13 @@ class PredictTool:
     def _select_features(self, df: pd.DataFrame) -> pd.DataFrame:
         if self._feature_order:
-            # keep only features in the trained order, ignore extras
             missing = [c for c in self._feature_order if c not in df.columns]
             if missing:
                 raise ValueError(f"Missing required features for model: {missing}")
             return df[self._feature_order].copy()
-        # default: use everything present
         return df.copy()
     def run(self, df: Optional[pd.DataFrame]) -> pd.DataFrame:
-        """
-        If df is None, returns an empty DataFrame.
-        """
         self._ensure_loaded()
         if df is None or len(df) == 0:
             return pd.DataFrame()
@@ -81,14 +58,11 @@ class PredictTool:
         X = self._select_features(df)
         model = self._model
-        # classification with probabilities preferred
         if hasattr(model, "predict_proba"):
             preds = model.predict_proba(X)[:, -1]
         elif hasattr(model, "decision_function"):
-            # fallback: map decision function to a score
             import numpy as np
             raw = model.decision_function(X)
-            # simple sigmoid to scale-ish if binary
             preds = 1 / (1 + np.exp(-raw))
         else:
             preds = model.predict(X)

 class PredictTool:
     """
+    Loads a sklearn-compatible tabular model from a HF repo and runs predictions.
     """
     def __init__(self, cfg: AppConfig, tracer: Tracer):
         self.cfg = cfg
         if self._model is not None:
             return
+        token = os.getenv("HF_TOKEN")
         repo = self.cfg.hf_model_repo
+        model_path = hf_hub_download(repo_id=repo, filename="model.pkl", token=token)
         self._model = joblib.load(model_path)
         try:
+            meta_path = hf_hub_download(repo_id=repo, filename="feature_metadata.json", token=token)
             with open(meta_path, "r", encoding="utf-8") as f:
                 self._feature_meta = json.load(f) or {}
         except Exception:
     def _select_features(self, df: pd.DataFrame) -> pd.DataFrame:
         if self._feature_order:
             missing = [c for c in self._feature_order if c not in df.columns]
             if missing:
                 raise ValueError(f"Missing required features for model: {missing}")
             return df[self._feature_order].copy()
         return df.copy()
     def run(self, df: Optional[pd.DataFrame]) -> pd.DataFrame:
         self._ensure_loaded()
         if df is None or len(df) == 0:
             return pd.DataFrame()
         X = self._select_features(df)
         model = self._model
         if hasattr(model, "predict_proba"):
             preds = model.predict_proba(X)[:, -1]
         elif hasattr(model, "decision_function"):
             import numpy as np
             raw = model.decision_function(X)
             preds = 1 / (1 + np.exp(-raw))
         else:
             preds = model.predict(X)