|
|
|
|
|
from sklearn.linear_model import MultiTaskElasticNetCV |
|
from sklearn.multioutput import MultiOutputRegressor |
|
|
|
from sklearn import metrics |
|
|
|
import pandas as pd, numpy as np |
|
import os |
|
import yaml |
|
import sys |
|
from scipy.stats import spearmanr |
|
|
|
|
|
|
|
|
|
X_train = pd.read_csv(f'{sys.argv[1]}', index_col=0) |
|
y_train = pd.read_csv(f'{sys.argv[2]}', index_col=0) |
|
score_columns = y_train.columns[y_train.columns.str.startswith('score')] |
|
y_train = y_train[score_columns] |
|
|
|
X_test = pd.read_csv(f'{sys.argv[3]}', index_col=0) |
|
y_test = pd.read_csv(f'{sys.argv[4]}', index_col=0) |
|
y_test = y_test[score_columns] |
|
|
|
regr = MultiTaskElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], random_state=0, n_jobs=64, max_iter=10000).fit(X_train, y_train) |
|
|
|
y_pred = regr.predict(X_test) |
|
for i, col in enumerate(y_test.columns): |
|
res = spearmanr(y_test[col], y_pred[:, i]).correlation |
|
print("Rho={:.9f}".format(res)) |
|
|