activeeon
/

models-repository

Model card Files Files and versions Community

models-repository / scikit-learn /isolation_forest /train.py

andrewssobral's picture

Added the isolation_forest algorithm

567404a over 1 year ago

history blame contribute delete

940 Bytes

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import joblib
	import numpy as np

	from sklearn.datasets import fetch_openml
	from sklearn.ensemble import IsolationForest
	from sklearn.model_selection import train_test_split

	# Set the random seed
	random_seed = 0
	np.random.seed(random_seed)

	# Load the dataset
	dataset_name = "cardiotocography"
	dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
	X, y = dataset.data, dataset.target
	s = y == "3"
	y = s.astype(int)

	# Split the dataset into training and testing sets
	X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed)

	# Define the number of estimators for Isolation Forest algorithm
	n_estimators = 100

	# Create and train the Isolation Forest model
	model = IsolationForest(n_estimators=n_estimators, random_state=random_seed)
	model.fit(X_train, y_train)

	# Save the trained model to a file
	joblib.dump(model, 'isolation_forest.joblib')