|
|
|
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import classification_report |
|
from sklearn import metrics |
|
|
|
import pandas as pd, numpy as np |
|
import os |
|
import yaml |
|
import sys |
|
|
|
|
|
|
|
|
|
X_train = pd.read_csv(f'{sys.argv[1]}', index_col=0) |
|
y_train = pd.read_csv(f'{sys.argv[2]}', index_col=0) |
|
y_train = y_train['score'] |
|
y_train = y_train.astype(int) |
|
X_test = pd.read_csv(f'{sys.argv[3]}', index_col=0) |
|
y_test = pd.read_csv(f'{sys.argv[4]}', index_col=0) |
|
y_test = y_test['score'] |
|
y_test = y_test.astype(int) |
|
|
|
gp_classifier = RandomForestClassifier(random_state=0).fit(X_train, y_train) |
|
|
|
|
|
fpr, tpr, thresholds = metrics.roc_curve(y_test.astype(float), |
|
gp_classifier.predict_proba(X_test)[:,1], |
|
pos_label=1) |
|
print("AUC={:.9f}".format(metrics.auc(fpr, tpr))) |
|
|
|
|
|
|