andrewssobral
commited on
Commit
•
567404a
1
Parent(s):
478d418
Added the isolation_forest algorithm
Browse files- scikit-learn/isolation_forest/client.py +73 -0
- scikit-learn/isolation_forest/convert2onnx.sh +1 -0
- scikit-learn/isolation_forest/isolation_forest.joblib +3 -0
- scikit-learn/isolation_forest/isolation_forest.onnx +3 -0
- scikit-learn/isolation_forest/isolation_forest.zip +3 -0
- scikit-learn/isolation_forest/isolation_forest/1/model.onnx +3 -0
- scikit-learn/isolation_forest/isolation_forest/config.pbtxt +18 -0
- scikit-learn/isolation_forest/model_packaging.sh +1 -0
- scikit-learn/isolation_forest/predict.py +38 -0
- scikit-learn/isolation_forest/train.py +33 -0
scikit-learn/isolation_forest/client.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import sys
|
5 |
+
import argparse
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
import tritonclient.grpc as grpcclient
|
9 |
+
|
10 |
+
from sklearn.datasets import fetch_openml
|
11 |
+
from sklearn.model_selection import train_test_split
|
12 |
+
from sklearn.metrics import accuracy_score
|
13 |
+
|
14 |
+
# Set the random seed
|
15 |
+
random_seed = 0
|
16 |
+
np.random.seed(random_seed)
|
17 |
+
|
18 |
+
|
19 |
+
def make_prediction(model_server, model_name, model_version, verbose):
|
20 |
+
try:
|
21 |
+
triton_client = grpcclient.InferenceServerClient(url=model_server, verbose=verbose)
|
22 |
+
except Exception as e:
|
23 |
+
print("channel creation failed: " + str(e))
|
24 |
+
sys.exit(1)
|
25 |
+
# Infer
|
26 |
+
inputs = []
|
27 |
+
outputs = []
|
28 |
+
# Load the dataset
|
29 |
+
dataset_name = "cardiotocography"
|
30 |
+
dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
|
31 |
+
X, y = dataset.data, dataset.target
|
32 |
+
s = y == "3"
|
33 |
+
y = s.astype(int)
|
34 |
+
# Split the dataset into training and testing sets
|
35 |
+
_, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
|
36 |
+
input_data = X_test.astype(np.float32)
|
37 |
+
input_label = y_test.astype(np.float32)
|
38 |
+
print(f'input_data:\n{input_data[0]}')
|
39 |
+
print(f'input_label:\n{input_label[0]}')
|
40 |
+
# input_data = np.expand_dims(input_data, axis=0)
|
41 |
+
# Initialize the data
|
42 |
+
inputs.append(grpcclient.InferInput('float_input', [input_data.shape[0], input_data.shape[1]], "FP32"))
|
43 |
+
inputs[0].set_data_from_numpy(input_data)
|
44 |
+
outputs.append(grpcclient.InferRequestedOutput('label'))
|
45 |
+
# Test with outputs
|
46 |
+
results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)
|
47 |
+
# print("response:\n", results.get_response())
|
48 |
+
statistics = triton_client.get_inference_statistics(model_name=model_name)
|
49 |
+
# print("statistics:\n", statistics)
|
50 |
+
if len(statistics.model_stats) != 1:
|
51 |
+
print("FAILED: Inference Statistics")
|
52 |
+
sys.exit(1)
|
53 |
+
# Get the output arrays from the results
|
54 |
+
y_pred = results.as_numpy('label').squeeze()
|
55 |
+
# Mapping 1->0 and -1->1
|
56 |
+
y_pred = np.where(y_pred == 1, 0, 1)
|
57 |
+
print(f'y_pred:\n{y_pred[0]}')
|
58 |
+
# Score the model using accuracy classification score
|
59 |
+
acc = accuracy_score(y_test, y_pred)
|
60 |
+
print(f'Accuracy classification score: {acc}')
|
61 |
+
|
62 |
+
|
63 |
+
"""
|
64 |
+
python client.py --model_server localhost:8001 --model_name isolation_forest --model_version 1
|
65 |
+
"""
|
66 |
+
if __name__ == "__main__":
|
67 |
+
parser = argparse.ArgumentParser(description="Make predictions using a specific model.")
|
68 |
+
parser.add_argument("--model_server", default="localhost:8001", help="The address of the model server.")
|
69 |
+
parser.add_argument("--model_name", default="isolation_forest", help="The name of the model to use.")
|
70 |
+
parser.add_argument("--model_version", default="1", help="The version of the model to use.")
|
71 |
+
parser.add_argument("--verbose", action="store_true", required=False, default=False, help='Enable verbose output')
|
72 |
+
args = parser.parse_args()
|
73 |
+
make_prediction(args.model_server, args.model_name, args.model_version, args.verbose)
|
scikit-learn/isolation_forest/convert2onnx.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python ../convert2onnx.py cardiotocography isolation_forest.joblib isolation_forest.onnx
|
scikit-learn/isolation_forest/isolation_forest.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc72c4a0184c0563c06f03cdda1c522983deabf3cc68afd44a59e47adbdeccfd
|
3 |
+
size 840211
|
scikit-learn/isolation_forest/isolation_forest.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
|
3 |
+
size 786921
|
scikit-learn/isolation_forest/isolation_forest.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5919365e113c62203098f01b2e1e701dd4bae5b210a423d5a5a0158649749700
|
3 |
+
size 125285
|
scikit-learn/isolation_forest/isolation_forest/1/model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4a4afb76f6e06146afdd53850b8f3a769a36247762e39d241daa0d4aff6a37
|
3 |
+
size 786921
|
scikit-learn/isolation_forest/isolation_forest/config.pbtxt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "isolation_forest"
|
2 |
+
backend: "onnxruntime"
|
3 |
+
max_batch_size: 0
|
4 |
+
input [
|
5 |
+
{
|
6 |
+
name: "float_input"
|
7 |
+
data_type: TYPE_FP32
|
8 |
+
dims: [ 35 ]
|
9 |
+
}
|
10 |
+
]
|
11 |
+
output [
|
12 |
+
]
|
13 |
+
instance_group [
|
14 |
+
{
|
15 |
+
count: 1
|
16 |
+
kind: KIND_CPU
|
17 |
+
}
|
18 |
+
]
|
scikit-learn/isolation_forest/model_packaging.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python ../model_packaging.py .
|
scikit-learn/isolation_forest/predict.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import joblib
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
from sklearn.datasets import fetch_openml
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
from sklearn.metrics import accuracy_score
|
10 |
+
|
11 |
+
# Load the model from disk
|
12 |
+
model = joblib.load('isolation_forest.joblib')
|
13 |
+
|
14 |
+
# Set the random seed
|
15 |
+
random_seed = 0
|
16 |
+
np.random.seed(random_seed)
|
17 |
+
|
18 |
+
# Load the dataset
|
19 |
+
dataset_name = "cardiotocography"
|
20 |
+
dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
|
21 |
+
X, y = dataset.data, dataset.target
|
22 |
+
s = y == "3"
|
23 |
+
y = s.astype(int)
|
24 |
+
|
25 |
+
# Split the dataset into training and testing sets
|
26 |
+
_, X_test, _, y_test = train_test_split(X, y, test_size=0.25, random_state=random_seed)
|
27 |
+
print(f'X_test:\n{X_test[0]}')
|
28 |
+
print(f'y_test:\n{y_test[0]}')
|
29 |
+
|
30 |
+
# Use the model to make predictions on the test data
|
31 |
+
y_pred = model.predict(X_test)
|
32 |
+
# Mapping 1->0 and -1->1
|
33 |
+
y_pred = np.where(y_pred == 1, 0, 1)
|
34 |
+
print(f'y_pred:\n{y_pred[0]}')
|
35 |
+
|
36 |
+
# Score the model using accuracy classification score
|
37 |
+
acc = accuracy_score(y_test, y_pred)
|
38 |
+
print(f'Accuracy classification score: {acc}')
|
scikit-learn/isolation_forest/train.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import joblib
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
from sklearn.datasets import fetch_openml
|
8 |
+
from sklearn.ensemble import IsolationForest
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
|
11 |
+
# Set the random seed
|
12 |
+
random_seed = 0
|
13 |
+
np.random.seed(random_seed)
|
14 |
+
|
15 |
+
# Load the dataset
|
16 |
+
dataset_name = "cardiotocography"
|
17 |
+
dataset = fetch_openml(name=dataset_name, version=1, as_frame=False)
|
18 |
+
X, y = dataset.data, dataset.target
|
19 |
+
s = y == "3"
|
20 |
+
y = s.astype(int)
|
21 |
+
|
22 |
+
# Split the dataset into training and testing sets
|
23 |
+
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed)
|
24 |
+
|
25 |
+
# Define the number of estimators for Isolation Forest algorithm
|
26 |
+
n_estimators = 100
|
27 |
+
|
28 |
+
# Create and train the Isolation Forest model
|
29 |
+
model = IsolationForest(n_estimators=n_estimators, random_state=random_seed)
|
30 |
+
model.fit(X_train, y_train)
|
31 |
+
|
32 |
+
# Save the trained model to a file
|
33 |
+
joblib.dump(model, 'isolation_forest.joblib')
|