|
import argparse |
|
|
|
import numpy as np |
|
import onnxruntime |
|
import pandas as pd |
|
|
|
from marcai.utils import load_config |
|
|
|
|
|
def sigmoid(x): |
|
return 1 / (1 + np.exp(-1 * x)) |
|
|
|
|
|
def predict_onnx(model_onnx_path, data): |
|
ort_session = onnxruntime.InferenceSession(model_onnx_path) |
|
|
|
x = data.to_numpy(dtype=np.float32) |
|
|
|
input_name = ort_session.get_inputs()[0].name |
|
ort_inputs = {input_name: x} |
|
ort_outs = np.array(ort_session.run(None, ort_inputs)) |
|
ort_outs = sigmoid(ort_outs) |
|
|
|
return ort_outs |
|
|
|
def args_parser(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"-i", "--input", help="Path to preprocessed data file", required=True |
|
) |
|
parser.add_argument("-o", "--output", help="Output path", required=True) |
|
parser.add_argument( |
|
"-m", |
|
"--model-dir", |
|
help="Directory containing model ONNX and YAML files", |
|
required=True, |
|
) |
|
parser.add_argument( |
|
"--chunksize", |
|
help="Chunk size for reading and predicting", |
|
default=1024, |
|
type=int, |
|
) |
|
return parser |
|
|
|
|
|
def main(args): |
|
config_path = f"{args.model_dir}/config.yaml" |
|
model_onnx = f"{args.model_dir}/model.onnx" |
|
|
|
config = load_config(config_path) |
|
|
|
|
|
data = pd.read_csv(args.input, chunksize=args.chunksize) |
|
|
|
written = False |
|
for chunk in data: |
|
|
|
input_df = chunk[config["model"]["features"]] |
|
|
|
prediction = predict_onnx(model_onnx, input_df) |
|
|
|
|
|
chunk["prediction"] = prediction.squeeze() |
|
|
|
|
|
if not written: |
|
chunk.to_csv(args.output, index=False) |
|
written = True |
|
else: |
|
chunk.to_csv(args.output, mode="a", header=False, index=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
args = args_parser().parse_args() |
|
main(args) |
|
|