#!/usr/bin/env python3 # -*- coding: utf-8 -*- import joblib import numpy as np from sklearn.datasets import fetch_openml from sklearn.ensemble import IsolationForest from sklearn.model_selection import train_test_split # Set the random seed random_seed = 0 np.random.seed(random_seed) # Load the dataset dataset_name = "cardiotocography" dataset = fetch_openml(name=dataset_name, version=1, as_frame=False) X, y = dataset.data, dataset.target s = y == "3" y = s.astype(int) # Split the dataset into training and testing sets X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed) # Define the number of estimators for Isolation Forest algorithm n_estimators = 100 # Create and train the Isolation Forest model model = IsolationForest(n_estimators=n_estimators, random_state=random_seed) model.fit(X_train, y_train) # Save the trained model to a file joblib.dump(model, 'isolation_forest.joblib')