Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -352,10 +352,12 @@ def calculate_topsis_score(df):
|
|
| 352 |
|
| 353 |
#--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
|
| 354 |
|
| 355 |
-
def NestedKFoldCrossValidation(
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
|
|
|
|
|
|
| 359 |
|
| 360 |
if 'phenotypes' not in training_data.columns:
|
| 361 |
raise ValueError("Training data does not contain the 'phenotypes' column.")
|
|
@@ -379,7 +381,7 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
|
|
| 379 |
|
| 380 |
# Feature selection
|
| 381 |
if feature_selection:
|
| 382 |
-
rf = RandomForestRegressor(n_estimators=100, random_state=
|
| 383 |
rf.fit(training_genotypic_data_merged, phenotypic_info)
|
| 384 |
selector = SelectFromModel(rf, threshold="mean", prefit=True)
|
| 385 |
training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
|
|
@@ -424,8 +426,7 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
|
|
| 424 |
elif model_name in ['RFModel']:
|
| 425 |
predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
|
| 426 |
else:
|
| 427 |
-
predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy,learning_rate,min_child_weight)
|
| 428 |
-
|
| 429 |
|
| 430 |
# Calculate metrics
|
| 431 |
mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
|
|
@@ -454,8 +455,6 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
|
|
| 454 |
|
| 455 |
# Compile results
|
| 456 |
results_df = pd.DataFrame(results)
|
| 457 |
-
|
| 458 |
-
# Calculate the average metrics for each model
|
| 459 |
avg_results_df = results_df.groupby('Model').agg({
|
| 460 |
'Train_MSE': 'mean',
|
| 461 |
'Train_RMSE': 'mean',
|
|
@@ -500,6 +499,17 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
|
|
| 500 |
|
| 501 |
return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
|
| 502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
#--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
|
| 504 |
|
| 505 |
def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
|
|
|
|
| 352 |
|
| 353 |
#--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
|
| 354 |
|
| 355 |
+
def NestedKFoldCrossValidation(
|
| 356 |
+
training_data, training_additive, testing_data, testing_additive,
|
| 357 |
+
training_dominance, testing_dominance, epochs, learning_rate, min_child_weight,
|
| 358 |
+
batch_size=64, outer_n_splits=2, output_file='cross_validation_results.csv',
|
| 359 |
+
predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True
|
| 360 |
+
):
|
| 361 |
|
| 362 |
if 'phenotypes' not in training_data.columns:
|
| 363 |
raise ValueError("Training data does not contain the 'phenotypes' column.")
|
|
|
|
| 381 |
|
| 382 |
# Feature selection
|
| 383 |
if feature_selection:
|
| 384 |
+
rf = RandomForestRegressor(n_estimators=100, random_state=65)
|
| 385 |
rf.fit(training_genotypic_data_merged, phenotypic_info)
|
| 386 |
selector = SelectFromModel(rf, threshold="mean", prefit=True)
|
| 387 |
training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
|
|
|
|
| 426 |
elif model_name in ['RFModel']:
|
| 427 |
predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
|
| 428 |
else:
|
| 429 |
+
predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
|
|
|
|
| 430 |
|
| 431 |
# Calculate metrics
|
| 432 |
mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
|
|
|
|
| 455 |
|
| 456 |
# Compile results
|
| 457 |
results_df = pd.DataFrame(results)
|
|
|
|
|
|
|
| 458 |
avg_results_df = results_df.groupby('Model').agg({
|
| 459 |
'Train_MSE': 'mean',
|
| 460 |
'Train_RMSE': 'mean',
|
|
|
|
| 499 |
|
| 500 |
return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
|
| 501 |
|
| 502 |
+
|
| 503 |
+
# Save the results to the file
|
| 504 |
+
#results_df.to_csv(output_file, index=False)
|
| 505 |
+
|
| 506 |
+
# Save predicted phenotypes
|
| 507 |
+
#if all_predicted_phenotypes:
|
| 508 |
+
# predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
|
| 509 |
+
#predicted_all_df.to_csv(predicted_phenotype_file, index=False)
|
| 510 |
+
|
| 511 |
+
# return results_df, predicted_all_df if all_predicted_phenotypes else None
|
| 512 |
+
|
| 513 |
#--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
|
| 514 |
|
| 515 |
def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
|