AshenH commited on
Commit
32817e1
·
verified ·
1 Parent(s): 54614e9

Update tools/predict_tool.py

Browse files
Files changed (1) hide show
  1. tools/predict_tool.py +4 -30
tools/predict_tool.py CHANGED
@@ -12,16 +12,7 @@ from utils.tracing import Tracer
12
 
13
  class PredictTool:
14
  """
15
- Loads a sklearn-compatible tabular model artifact from a private/public
16
- Hugging Face repo and runs batch predictions on a DataFrame.
17
- Expects:
18
- - model.pkl
19
- - feature_metadata.json (optional but recommended)
20
- {
21
- "feature_order": ["col1","col2",...],
22
- "prediction_column": "prediction",
23
- "task": "classification" | "regression"
24
- }
25
  """
26
  def __init__(self, cfg: AppConfig, tracer: Tracer):
27
  self.cfg = cfg
@@ -35,23 +26,14 @@ class PredictTool:
35
  if self._model is not None:
36
  return
37
 
38
- token = os.getenv("HF_TOKEN") # OK if None for public repos
39
  repo = self.cfg.hf_model_repo
40
 
41
- model_path = hf_hub_download(
42
- repo_id=repo,
43
- filename="model.pkl",
44
- token=token
45
- )
46
  self._model = joblib.load(model_path)
47
 
48
- # feature metadata is optional; handle gracefully
49
  try:
50
- meta_path = hf_hub_download(
51
- repo_id=repo,
52
- filename="feature_metadata.json",
53
- token=token
54
- )
55
  with open(meta_path, "r", encoding="utf-8") as f:
56
  self._feature_meta = json.load(f) or {}
57
  except Exception:
@@ -62,18 +44,13 @@ class PredictTool:
62
 
63
  def _select_features(self, df: pd.DataFrame) -> pd.DataFrame:
64
  if self._feature_order:
65
- # keep only features in the trained order, ignore extras
66
  missing = [c for c in self._feature_order if c not in df.columns]
67
  if missing:
68
  raise ValueError(f"Missing required features for model: {missing}")
69
  return df[self._feature_order].copy()
70
- # default: use everything present
71
  return df.copy()
72
 
73
  def run(self, df: Optional[pd.DataFrame]) -> pd.DataFrame:
74
- """
75
- If df is None, returns an empty DataFrame.
76
- """
77
  self._ensure_loaded()
78
  if df is None or len(df) == 0:
79
  return pd.DataFrame()
@@ -81,14 +58,11 @@ class PredictTool:
81
  X = self._select_features(df)
82
  model = self._model
83
 
84
- # classification with probabilities preferred
85
  if hasattr(model, "predict_proba"):
86
  preds = model.predict_proba(X)[:, -1]
87
  elif hasattr(model, "decision_function"):
88
- # fallback: map decision function to a score
89
  import numpy as np
90
  raw = model.decision_function(X)
91
- # simple sigmoid to scale-ish if binary
92
  preds = 1 / (1 + np.exp(-raw))
93
  else:
94
  preds = model.predict(X)
 
12
 
13
  class PredictTool:
14
  """
15
+ Loads a sklearn-compatible tabular model from a HF repo and runs predictions.
 
 
 
 
 
 
 
 
 
16
  """
17
  def __init__(self, cfg: AppConfig, tracer: Tracer):
18
  self.cfg = cfg
 
26
  if self._model is not None:
27
  return
28
 
29
+ token = os.getenv("HF_TOKEN")
30
  repo = self.cfg.hf_model_repo
31
 
32
+ model_path = hf_hub_download(repo_id=repo, filename="model.pkl", token=token)
 
 
 
 
33
  self._model = joblib.load(model_path)
34
 
 
35
  try:
36
+ meta_path = hf_hub_download(repo_id=repo, filename="feature_metadata.json", token=token)
 
 
 
 
37
  with open(meta_path, "r", encoding="utf-8") as f:
38
  self._feature_meta = json.load(f) or {}
39
  except Exception:
 
44
 
45
  def _select_features(self, df: pd.DataFrame) -> pd.DataFrame:
46
  if self._feature_order:
 
47
  missing = [c for c in self._feature_order if c not in df.columns]
48
  if missing:
49
  raise ValueError(f"Missing required features for model: {missing}")
50
  return df[self._feature_order].copy()
 
51
  return df.copy()
52
 
53
  def run(self, df: Optional[pd.DataFrame]) -> pd.DataFrame:
 
 
 
54
  self._ensure_loaded()
55
  if df is None or len(df) == 0:
56
  return pd.DataFrame()
 
58
  X = self._select_features(df)
59
  model = self._model
60
 
 
61
  if hasattr(model, "predict_proba"):
62
  preds = model.predict_proba(X)[:, -1]
63
  elif hasattr(model, "decision_function"):
 
64
  import numpy as np
65
  raw = model.decision_function(X)
 
66
  preds = 1 / (1 + np.exp(-raw))
67
  else:
68
  preds = model.predict(X)