Improve IndoHoaxDetector repo: add comprehensive README, model card, examples, evaluation script, tests, license, and fix app.py for hoax detection
af7b60b | #!/usr/bin/env python3 | |
| """ | |
| Evaluation script for IndoHoaxDetector model | |
| This script computes various performance metrics for the hoax detection model. | |
| """ | |
| import pickle | |
| import numpy as np | |
| from sklearn.metrics import ( | |
| accuracy_score, | |
| precision_score, | |
| recall_score, | |
| f1_score, | |
| confusion_matrix, | |
| classification_report | |
| ) | |
| def load_model(model_path='logreg_model.pkl'): | |
| """Load the trained model.""" | |
| with open(model_path, 'rb') as f: | |
| model = pickle.load(f) | |
| return model | |
| def evaluate_model(model, X_test, y_test): | |
| """ | |
| Evaluate the model on test data. | |
| Args: | |
| model: Trained sklearn model | |
| X_test: Test features (texts) | |
| y_test: True labels (0: legitimate, 1: hoax) | |
| Returns: | |
| dict: Dictionary containing evaluation metrics | |
| """ | |
| # Make predictions | |
| y_pred = model.predict(X_test) | |
| y_pred_proba = model.predict_proba(X_test) | |
| # Calculate metrics | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred, average='binary') | |
| recall = recall_score(y_test, y_pred, average='binary') | |
| f1 = f1_score(y_test, y_pred, average='binary') | |
| # Confusion matrix | |
| cm = confusion_matrix(y_test, y_pred) | |
| # Classification report | |
| report = classification_report(y_test, y_pred, target_names=['Legitimate', 'Hoax']) | |
| return { | |
| 'accuracy': accuracy, | |
| 'precision': precision, | |
| 'recall': recall, | |
| 'f1_score': f1, | |
| 'confusion_matrix': cm, | |
| 'classification_report': report, | |
| 'predictions': y_pred, | |
| 'probabilities': y_pred_proba | |
| } | |
| def print_evaluation_results(results): | |
| """Print evaluation results in a formatted way.""" | |
| print("=" * 60) | |
| print("IndoHoaxDetector Model Evaluation Results") | |
| print("=" * 60) | |
| print(".4f") | |
| print(".4f") | |
| print(".4f") | |
| print(".4f") | |
| print("\nConfusion Matrix:") | |
| print("[[True Negative, False Positive]") | |
| print(" [False Negative, True Positive]]") | |
| print(results['confusion_matrix']) | |
| print("\nDetailed Classification Report:") | |
| print(results['classification_report']) | |
| def evaluate_on_sample_data(model): | |
| """Evaluate on sample data for demonstration.""" | |
| # Sample test data (in a real scenario, this would be your actual test set) | |
| sample_texts = [ | |
| "Pemerintah mengumumkan bantuan sosial untuk masyarakat terdampak pandemi.", # Legitimate | |
| "Ditemukan cara instan kaya raya dalam semalam.", # Hoax | |
| "Harga beras stabil di pasaran tradisional.", # Legitimate | |
| "Vaksin COVID-19 mengandung chip pelacak.", # Hoax | |
| "Bank Indonesia menaikkan suku bunga acuan.", # Legitimate | |
| "Minum air rebusan daun jambu bisa menyembuhkan kanker.", # Hoax | |
| "Jokowi bertemu dengan delegasi dari Amerika Serikat.", # Legitimate | |
| "UFO mendarat di Istana Negara dan bertemu presiden." # Hoax | |
| ] | |
| sample_labels = [0, 1, 0, 1, 0, 1, 0, 1] # 0: legitimate, 1: hoax | |
| print("Evaluating on sample data...") | |
| results = evaluate_model(model, sample_texts, sample_labels) | |
| print_evaluation_results(results) | |
| return results | |
| def main(): | |
| """Main evaluation function.""" | |
| # Load model | |
| print("Loading IndoHoaxDetector model...") | |
| model = load_model() | |
| # Evaluate on sample data | |
| evaluate_on_sample_data(model) | |
| print("\n" + "=" * 60) | |
| print("To evaluate on your own test data:") | |
| print("1. Prepare your test texts and labels") | |
| print("2. Call evaluate_model(model, X_test, y_test)") | |
| print("3. Use print_evaluation_results(results) to display metrics") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() |