AshmithaIRRI commited on
Commit
778b5fc
·
verified ·
1 Parent(s): 5b1fe9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -352,10 +352,12 @@ def calculate_topsis_score(df):
352
 
353
  #--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
354
 
355
- def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
356
- training_dominance, testing_dominance, epochs,learning_rate,min_child_weight, batch_size=64,
357
- outer_n_splits=2, inner_n_splits=2, output_file='cross_validation_results.csv',
358
- predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True):
 
 
359
 
360
  if 'phenotypes' not in training_data.columns:
361
  raise ValueError("Training data does not contain the 'phenotypes' column.")
@@ -379,7 +381,7 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
379
 
380
  # Feature selection
381
  if feature_selection:
382
- rf = RandomForestRegressor(n_estimators=100, random_state=42)
383
  rf.fit(training_genotypic_data_merged, phenotypic_info)
384
  selector = SelectFromModel(rf, threshold="mean", prefit=True)
385
  training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
@@ -424,8 +426,7 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
424
  elif model_name in ['RFModel']:
425
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
426
  else:
427
- predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy,learning_rate,min_child_weight)
428
-
429
 
430
  # Calculate metrics
431
  mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
@@ -454,8 +455,6 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
454
 
455
  # Compile results
456
  results_df = pd.DataFrame(results)
457
-
458
- # Calculate the average metrics for each model
459
  avg_results_df = results_df.groupby('Model').agg({
460
  'Train_MSE': 'mean',
461
  'Train_RMSE': 'mean',
@@ -500,6 +499,17 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
500
 
501
  return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
502
 
 
 
 
 
 
 
 
 
 
 
 
503
  #--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
504
 
505
  def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
 
352
 
353
  #--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
354
 
355
+ def NestedKFoldCrossValidation(
356
+ training_data, training_additive, testing_data, testing_additive,
357
+ training_dominance, testing_dominance, epochs, learning_rate, min_child_weight,
358
+ batch_size=64, outer_n_splits=2, output_file='cross_validation_results.csv',
359
+ predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True
360
+ ):
361
 
362
  if 'phenotypes' not in training_data.columns:
363
  raise ValueError("Training data does not contain the 'phenotypes' column.")
 
381
 
382
  # Feature selection
383
  if feature_selection:
384
+ rf = RandomForestRegressor(n_estimators=100, random_state=65)
385
  rf.fit(training_genotypic_data_merged, phenotypic_info)
386
  selector = SelectFromModel(rf, threshold="mean", prefit=True)
387
  training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
 
426
  elif model_name in ['RFModel']:
427
  predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
428
  else:
429
+ predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
 
430
 
431
  # Calculate metrics
432
  mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
 
455
 
456
  # Compile results
457
  results_df = pd.DataFrame(results)
 
 
458
  avg_results_df = results_df.groupby('Model').agg({
459
  'Train_MSE': 'mean',
460
  'Train_RMSE': 'mean',
 
499
 
500
  return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
501
 
502
+
503
+ # Save the results to the file
504
+ #results_df.to_csv(output_file, index=False)
505
+
506
+ # Save predicted phenotypes
507
+ #if all_predicted_phenotypes:
508
+ # predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
509
+ #predicted_all_df.to_csv(predicted_phenotype_file, index=False)
510
+
511
+ # return results_df, predicted_all_df if all_predicted_phenotypes else None
512
+
513
  #--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
514
 
515
  def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,