#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import joblib
import numpy as np

from sklearn.datasets import fetch_openml
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split

# Set the random seed
random_seed = 0
np.random.seed(random_seed)

# Load the dataset
dataset_name = "cardiotocography"
dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
X, y = dataset.data, dataset.target
s = y == "3"
y = s.astype(int)

# Split the dataset into training and testing sets
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed)

# Define the number of estimators for Isolation Forest algorithm
n_estimators = 100

# Create and train the Isolation Forest model
model = IsolationForest(n_estimators=n_estimators, random_state=random_seed)
model.fit(X_train, y_train)

# Save the trained model to a file
joblib.dump(model, 'isolation_forest.joblib')