Added the isolation_forest algorithm

Browse files

Files changed (10) hide show

scikit-learn/isolation_forest/client.py +73 -0
scikit-learn/isolation_forest/convert2onnx.sh +1 -0
scikit-learn/isolation_forest/isolation_forest.joblib +3 -0
scikit-learn/isolation_forest/isolation_forest.onnx +3 -0
scikit-learn/isolation_forest/isolation_forest.zip +3 -0
scikit-learn/isolation_forest/isolation_forest/1/model.onnx +3 -0
scikit-learn/isolation_forest/isolation_forest/config.pbtxt +18 -0
scikit-learn/isolation_forest/model_packaging.sh +1 -0
scikit-learn/isolation_forest/predict.py +38 -0
scikit-learn/isolation_forest/train.py +33 -0

scikit-learn/isolation_forest/client.py ADDED Viewed

	@@ -0,0 +1,73 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import sys
+import argparse
+import numpy as np
+import tritonclient.grpc as grpcclient
+from sklearn.datasets import fetch_openml
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+# Set the random seed
+random_seed = 0
+np.random.seed(random_seed)
+def make_prediction(model_server, model_name, model_version, verbose):
+    try:
+        triton_client = grpcclient.InferenceServerClient(url=model_server, verbose=verbose)
+    except Exception as e:
+        print("channel creation failed: " + str(e))
+        sys.exit(1)
+    # Infer
+    inputs = []
+    outputs = []
+    # Load the dataset
+    dataset_name = "cardiotocography"
+    dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
+    X, y = dataset.data, dataset.target
+    s = y == "3"
+    y = s.astype(int)
+    # Split the dataset into training and testing sets
+    _, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
+    input_data = X_test.astype(np.float32)
+    input_label = y_test.astype(np.float32)
+    print(f'input_data:\n{input_data[0]}')
+    print(f'input_label:\n{input_label[0]}')
+    # input_data = np.expand_dims(input_data, axis=0)
+    # Initialize the data
+    inputs.append(grpcclient.InferInput('float_input', [input_data.shape[0], input_data.shape[1]], "FP32"))
+    inputs[0].set_data_from_numpy(input_data)
+    outputs.append(grpcclient.InferRequestedOutput('label'))
+    # Test with outputs
+    results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)
+    # print("response:\n", results.get_response())
+    statistics = triton_client.get_inference_statistics(model_name=model_name)
+    # print("statistics:\n", statistics)
+    if len(statistics.model_stats) != 1:
+        print("FAILED: Inference Statistics")
+        sys.exit(1)
+    # Get the output arrays from the results
+    y_pred = results.as_numpy('label').squeeze()
+    # Mapping 1->0 and -1->1
+    y_pred = np.where(y_pred == 1, 0, 1)
+    print(f'y_pred:\n{y_pred[0]}')
+    # Score the model using accuracy classification score
+    acc = accuracy_score(y_test, y_pred)
+    print(f'Accuracy classification score: {acc}')
+"""
+python client.py --model_server localhost:8001 --model_name isolation_forest --model_version 1
+"""
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Make predictions using a specific model.")
+    parser.add_argument("--model_server", default="localhost:8001", help="The address of the model server.")
+    parser.add_argument("--model_name", default="isolation_forest", help="The name of the model to use.")
+    parser.add_argument("--model_version", default="1", help="The version of the model to use.")
+    parser.add_argument("--verbose", action="store_true", required=False, default=False, help='Enable verbose output')
+    args = parser.parse_args()
+    make_prediction(args.model_server, args.model_name, args.model_version, args.verbose)

scikit-learn/isolation_forest/convert2onnx.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python ../convert2onnx.py cardiotocography isolation_forest.joblib isolation_forest.onnx

scikit-learn/isolation_forest/isolation_forest.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc72c4a0184c0563c06f03cdda1c522983deabf3cc68afd44a59e47adbdeccfd
+size 840211

scikit-learn/isolation_forest/isolation_forest.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
+size 786921

scikit-learn/isolation_forest/isolation_forest.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5919365e113c62203098f01b2e1e701dd4bae5b210a423d5a5a0158649749700
+size 125285

scikit-learn/isolation_forest/isolation_forest/1/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
+size 786921

scikit-learn/isolation_forest/isolation_forest/config.pbtxt ADDED Viewed

	@@ -0,0 +1,18 @@

+name: "isolation_forest"
+backend: "onnxruntime"
+max_batch_size: 0
+input [
+  {
+    name: "float_input"
+    data_type: TYPE_FP32
+    dims: [ 35 ]
+  }
+]
+output [
+]
+instance_group [
+  {
+    count: 1
+    kind: KIND_CPU
+  }
+]

scikit-learn/isolation_forest/model_packaging.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python ../model_packaging.py .

scikit-learn/isolation_forest/predict.py ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import joblib
+import numpy as np
+from sklearn.datasets import fetch_openml
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+# Load the model from disk
+model = joblib.load('isolation_forest.joblib')
+# Set the random seed
+random_seed = 0
+np.random.seed(random_seed)
+# Load the dataset
+dataset_name = "cardiotocography"
+dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
+X, y = dataset.data, dataset.target
+s = y == "3"
+y = s.astype(int)
+# Split the dataset into training and testing sets
+_, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
+print(f'X_test:\n{X_test[0]}')
+print(f'y_test:\n{y_test[0]}')
+# Use the model to make predictions on the test data
+y_pred = model.predict(X_test)
+# Mapping 1->0 and -1->1
+y_pred = np.where(y_pred == 1, 0, 1)
+print(f'y_pred:\n{y_pred[0]}')
+# Score the model using accuracy classification score
+acc = accuracy_score(y_test, y_pred)
+print(f'Accuracy classification score: {acc}')

scikit-learn/isolation_forest/train.py ADDED Viewed

	@@ -0,0 +1,33 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import joblib
+import numpy as np
+from sklearn.datasets import fetch_openml
+from sklearn.ensemble import IsolationForest
+from sklearn.model_selection import train_test_split
+# Set the random seed
+random_seed = 0
+np.random.seed(random_seed)
+# Load the dataset
+dataset_name = "cardiotocography"
+dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
+X, y = dataset.data, dataset.target
+s = y == "3"
+y = s.astype(int)
+# Split the dataset into training and testing sets
+X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed)
+# Define the number of estimators for Isolation Forest algorithm
+n_estimators = 100
+# Create and train the Isolation Forest model
+model = IsolationForest(n_estimators=n_estimators, random_state=random_seed)
+model.fit(X_train, y_train)
+# Save the trained model to a file
+joblib.dump(model, 'isolation_forest.joblib')