| | |
| | import argparse |
| | import json |
| | from pathlib import Path |
| |
|
| | import joblib |
| | from sklearn.metrics import ( |
| | accuracy_score, |
| | precision_recall_fscore_support, |
| | classification_report |
| | ) |
| |
|
| | BASE_DIR = Path(__file__).resolve().parent.parent |
| | MODELS_DIR = BASE_DIR / "models" |
| | DATA_DIR = BASE_DIR / "data" |
| |
|
| |
|
| | def load_model(): |
| | model_path = MODELS_DIR / "trained_pipeline.joblib" |
| | if not model_path.exists(): |
| | raise FileNotFoundError(f"Model not found: {model_path}") |
| | return joblib.load(model_path) |
| |
|
| |
|
| | def load_dataset(dataset_path: Path): |
| | if not dataset_path.exists(): |
| | raise FileNotFoundError(f"Dataset not found: {dataset_path}") |
| |
|
| | |
| | if dataset_path.name in {"training_data.json", "train.json"}: |
| | raise RuntimeError( |
| | f"Refusing to evaluate on training dataset: {dataset_path.name}" |
| | ) |
| |
|
| | with dataset_path.open("r", encoding="utf-8") as f: |
| | raw = json.load(f) |
| |
|
| | if isinstance(raw, list): |
| | samples = raw |
| | elif isinstance(raw, dict) and "samples" in raw: |
| | samples = raw["samples"] |
| | else: |
| | raise ValueError("Unsupported JSON dataset format") |
| |
|
| | texts = [] |
| | labels = [] |
| |
|
| | for i, item in enumerate(samples): |
| | if "text" not in item or "label" not in item: |
| | raise ValueError(f"Invalid sample at index {i}: {item}") |
| | texts.append(item["text"]) |
| | labels.append(item["label"]) |
| |
|
| | return texts, labels |
| |
|
| |
|
| | def evaluate(model, X, y): |
| | y_pred = model.predict(X) |
| |
|
| | acc = accuracy_score(y, y_pred) |
| | precision, recall, f1, _ = precision_recall_fscore_support( |
| | y, y_pred, average="weighted", zero_division=0 |
| | ) |
| |
|
| | print("====================================") |
| | print("Offline Evaluation Results") |
| | print("====================================") |
| | print(f"Samples : {len(y)}") |
| | print(f"Accuracy : {acc:.4f}") |
| | print(f"Precision: {precision:.4f}") |
| | print(f"Recall : {recall:.4f}") |
| | print(f"F1-score : {f1:.4f}") |
| | print() |
| | print("Detailed Classification Report") |
| | print("------------------------------------") |
| | print(classification_report(y, y_pred, zero_division=0)) |
| |
|
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser( |
| | description="Offline evaluation using held-out JSON dataset" |
| | ) |
| | parser.add_argument( |
| | "--data", |
| | default=str(DATA_DIR / "samples" / "eval.json"), |
| | help="Path to evaluation dataset (default: data/samples/eval.json)" |
| | ) |
| |
|
| | args = parser.parse_args() |
| |
|
| | model = load_model() |
| | X, y = load_dataset(Path(args.data)) |
| | evaluate(model, X, y) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|