andrewssobral commited on
Commit
567404a
1 Parent(s): 478d418

Added the isolation_forest algorithm

Browse files
scikit-learn/isolation_forest/client.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import sys
5
+ import argparse
6
+ import numpy as np
7
+
8
+ import tritonclient.grpc as grpcclient
9
+
10
+ from sklearn.datasets import fetch_openml
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.metrics import accuracy_score
13
+
14
+ # Set the random seed
15
+ random_seed = 0
16
+ np.random.seed(random_seed)
17
+
18
+
19
+ def make_prediction(model_server, model_name, model_version, verbose):
20
+ try:
21
+ triton_client = grpcclient.InferenceServerClient(url=model_server, verbose=verbose)
22
+ except Exception as e:
23
+ print("channel creation failed: " + str(e))
24
+ sys.exit(1)
25
+ # Infer
26
+ inputs = []
27
+ outputs = []
28
+ # Load the dataset
29
+ dataset_name = "cardiotocography"
30
+ dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
31
+ X, y = dataset.data, dataset.target
32
+ s = y == "3"
33
+ y = s.astype(int)
34
+ # Split the dataset into training and testing sets
35
+ _, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
36
+ input_data = X_test.astype(np.float32)
37
+ input_label = y_test.astype(np.float32)
38
+ print(f'input_data:\n{input_data[0]}')
39
+ print(f'input_label:\n{input_label[0]}')
40
+ # input_data = np.expand_dims(input_data, axis=0)
41
+ # Initialize the data
42
+ inputs.append(grpcclient.InferInput('float_input', [input_data.shape[0], input_data.shape[1]], "FP32"))
43
+ inputs[0].set_data_from_numpy(input_data)
44
+ outputs.append(grpcclient.InferRequestedOutput('label'))
45
+ # Test with outputs
46
+ results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)
47
+ # print("response:\n", results.get_response())
48
+ statistics = triton_client.get_inference_statistics(model_name=model_name)
49
+ # print("statistics:\n", statistics)
50
+ if len(statistics.model_stats) != 1:
51
+ print("FAILED: Inference Statistics")
52
+ sys.exit(1)
53
+ # Get the output arrays from the results
54
+ y_pred = results.as_numpy('label').squeeze()
55
+ # Mapping 1->0 and -1->1
56
+ y_pred = np.where(y_pred == 1, 0, 1)
57
+ print(f'y_pred:\n{y_pred[0]}')
58
+ # Score the model using accuracy classification score
59
+ acc = accuracy_score(y_test, y_pred)
60
+ print(f'Accuracy classification score: {acc}')
61
+
62
+
63
+ """
64
+ python client.py --model_server localhost:8001 --model_name isolation_forest --model_version 1
65
+ """
66
+ if __name__ == "__main__":
67
+ parser = argparse.ArgumentParser(description="Make predictions using a specific model.")
68
+ parser.add_argument("--model_server", default="localhost:8001", help="The address of the model server.")
69
+ parser.add_argument("--model_name", default="isolation_forest", help="The name of the model to use.")
70
+ parser.add_argument("--model_version", default="1", help="The version of the model to use.")
71
+ parser.add_argument("--verbose", action="store_true", required=False, default=False, help='Enable verbose output')
72
+ args = parser.parse_args()
73
+ make_prediction(args.model_server, args.model_name, args.model_version, args.verbose)
scikit-learn/isolation_forest/convert2onnx.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python ../convert2onnx.py cardiotocography isolation_forest.joblib isolation_forest.onnx
scikit-learn/isolation_forest/isolation_forest.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc72c4a0184c0563c06f03cdda1c522983deabf3cc68afd44a59e47adbdeccfd
3
+ size 840211
scikit-learn/isolation_forest/isolation_forest.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
3
+ size 786921
scikit-learn/isolation_forest/isolation_forest.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5919365e113c62203098f01b2e1e701dd4bae5b210a423d5a5a0158649749700
3
+ size 125285
scikit-learn/isolation_forest/isolation_forest/1/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
3
+ size 786921
scikit-learn/isolation_forest/isolation_forest/config.pbtxt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "isolation_forest"
2
+ backend: "onnxruntime"
3
+ max_batch_size: 0
4
+ input [
5
+ {
6
+ name: "float_input"
7
+ data_type: TYPE_FP32
8
+ dims: [ 35 ]
9
+ }
10
+ ]
11
+ output [
12
+ ]
13
+ instance_group [
14
+ {
15
+ count: 1
16
+ kind: KIND_CPU
17
+ }
18
+ ]
scikit-learn/isolation_forest/model_packaging.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python ../model_packaging.py .
scikit-learn/isolation_forest/predict.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import joblib
5
+ import numpy as np
6
+
7
+ from sklearn.datasets import fetch_openml
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import accuracy_score
10
+
11
+ # Load the model from disk
12
+ model = joblib.load('isolation_forest.joblib')
13
+
14
+ # Set the random seed
15
+ random_seed = 0
16
+ np.random.seed(random_seed)
17
+
18
+ # Load the dataset
19
+ dataset_name = "cardiotocography"
20
+ dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
21
+ X, y = dataset.data, dataset.target
22
+ s = y == "3"
23
+ y = s.astype(int)
24
+
25
+ # Split the dataset into training and testing sets
26
+ _, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
27
+ print(f'X_test:\n{X_test[0]}')
28
+ print(f'y_test:\n{y_test[0]}')
29
+
30
+ # Use the model to make predictions on the test data
31
+ y_pred = model.predict(X_test)
32
+ # Mapping 1->0 and -1->1
33
+ y_pred = np.where(y_pred == 1, 0, 1)
34
+ print(f'y_pred:\n{y_pred[0]}')
35
+
36
+ # Score the model using accuracy classification score
37
+ acc = accuracy_score(y_test, y_pred)
38
+ print(f'Accuracy classification score: {acc}')
scikit-learn/isolation_forest/train.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import joblib
5
+ import numpy as np
6
+
7
+ from sklearn.datasets import fetch_openml
8
+ from sklearn.ensemble import IsolationForest
9
+ from sklearn.model_selection import train_test_split
10
+
11
+ # Set the random seed
12
+ random_seed = 0
13
+ np.random.seed(random_seed)
14
+
15
+ # Load the dataset
16
+ dataset_name = "cardiotocography"
17
+ dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
18
+ X, y = dataset.data, dataset.target
19
+ s = y == "3"
20
+ y = s.astype(int)
21
+
22
+ # Split the dataset into training and testing sets
23
+ X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed)
24
+
25
+ # Define the number of estimators for Isolation Forest algorithm
26
+ n_estimators = 100
27
+
28
+ # Create and train the Isolation Forest model
29
+ model = IsolationForest(n_estimators=n_estimators, random_state=random_seed)
30
+ model.fit(X_train, y_train)
31
+
32
+ # Save the trained model to a file
33
+ joblib.dump(model, 'isolation_forest.joblib')