Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

App Files Files Community

Maria Castellanos commited on 3 days ago

Commit

179f265

1 Parent(s): d1f7806

Create raw csv file along clean

Browse files

Files changed (3) hide show

evaluate.py +46 -4
requirements.txt +2 -1
utils.py +1 -0

evaluate.py CHANGED Viewed

@@ -223,7 +223,7 @@ def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_r
     data_df = pd.read_csv(local_path)
     test_df = pd.read_csv(test_path)
     try:
-        results_df = calculate_metrics(data_df, test_df)
         if not isinstance(results_df, pd.DataFrame) or results_df.empty:
             raise gr.Error("Evaluation produced no results.")
     except Exception as e:
@@ -256,6 +256,13 @@ def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_r
     results_df['model_report'] = report
     results_df['anonymous'] = meta.participant.anonymous
     results_df['hf_username'] = username
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
@@ -273,7 +280,22 @@ def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_r
         )
     Path(tmp_name).unlink()
 def calculate_metrics(
         results_dataframe: pd.DataFrame,
@@ -310,6 +332,7 @@ def calculate_metrics(
     final_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"]
     all_endpoint_results = []
     for ept in ENDPOINTS:
         pred_col = f"{ept}_pred"
@@ -351,10 +374,22 @@ def calculate_metrics(
         df_endpoint = df_reindexed.reset_index()
         all_endpoint_results.append(df_endpoint)
     df_results = pd.concat(all_endpoint_results, ignore_index=True)
     mean_cols = [f'mean_{m}' for m in final_cols]
     std_cols = [f'std_{m}' for m in final_cols]
-    # Average results
     macro_means = df_results[mean_cols].mean()
     macro_stds = df_results[std_cols].mean()
     avg_row = {"Endpoint": "Average"}
@@ -363,4 +398,11 @@ def calculate_metrics(
     df_with_average = pd.concat([df_results, pd.DataFrame([avg_row])], ignore_index=True)
     # Fix order of columns
     df_with_average = df_with_average[["Endpoint"]+mean_cols+std_cols]
-    return df_with_average

     data_df = pd.read_csv(local_path)
     test_df = pd.read_csv(test_path)
     try:
+        results_df, results_raw_df = calculate_metrics(data_df, test_df)
         if not isinstance(results_df, pd.DataFrame) or results_df.empty:
             raise gr.Error("Evaluation produced no results.")
     except Exception as e:
     results_df['model_report'] = report
     results_df['anonymous'] = meta.participant.anonymous
     results_df['hf_username'] = username
+    results_raw_df['user'] = display_name
+    results_raw_df['submission_time'] = timestamp
+    results_raw_df['model_report'] = report
+    results_raw_df['anonymous'] = meta.participant.anonymous
+    results_raw_df['hf_username'] = username
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
         )
     Path(tmp_name).unlink()
+    # Same for raw file
+    destination_path_raw = f"results/{safe_user}_{timestamp}_results_raw.csv"
+    tmp_name = None
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:
+        results_raw_df.to_csv(tmp, index=False)
+        tmp.flush()
+        tmp_name = tmp.name
+    API.upload_file(
+            path_or_fileobj=tmp_name,
+            path_in_repo=destination_path_raw,
+            repo_id=results_repo,
+            repo_type="dataset",
+            commit_message=f"Add raw result data for {username}"
+        )
+    Path(tmp_name).unlink()
 def calculate_metrics(
         results_dataframe: pd.DataFrame,
     final_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"]
     all_endpoint_results = []
+    all_endpoint_results_raw = []
     for ept in ENDPOINTS:
         pred_col = f"{ept}_pred"
         df_endpoint = df_reindexed.reset_index()
         all_endpoint_results.append(df_endpoint)
+        # Also save a raw dataframe with all the bootstrapping samples
+        df_endpoint_raw = bootstrap_df.pivot_table(
+            index=["Sample", "Endpoint"],
+            columns="Metric",
+            values="Value"
+        ).reset_index()
+        df_endpoint_raw.columns.name = None
+        df_endpoint_raw['Sample'] = df_endpoint_raw['Sample'].astype(int)
+        all_endpoint_results_raw.append(df_endpoint_raw)
     df_results = pd.concat(all_endpoint_results, ignore_index=True)
+    df_results_raw = pd.concat(all_endpoint_results_raw, ignore_index=True)
+    # Average results
     mean_cols = [f'mean_{m}' for m in final_cols]
     std_cols = [f'std_{m}' for m in final_cols]
     macro_means = df_results[mean_cols].mean()
     macro_stds = df_results[std_cols].mean()
     avg_row = {"Endpoint": "Average"}
     df_with_average = pd.concat([df_results, pd.DataFrame([avg_row])], ignore_index=True)
     # Fix order of columns
     df_with_average = df_with_average[["Endpoint"]+mean_cols+std_cols]
+    # Average results for raw dataframe
+    macro_results_by_sample = df_results_raw.groupby('Sample')[final_cols].mean().reset_index()
+    macro_results_by_sample["Endpoint"] = "Average"
+    df_raw_with_average = pd.concat([df_results_raw, macro_results_by_sample], ignore_index=True)
+    df_raw_with_average = df_raw_with_average[["Sample","Endpoint"] + final_cols]
+    return df_with_average, df_raw_with_average

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ plotly
 scipy
 scikit-learn
 loguru
-statsmodels

 scipy
 scikit-learn
 loguru
+statsmodels
+tqdm

utils.py CHANGED Viewed

@@ -35,6 +35,7 @@ def fetch_dataset_df():
     feature_schema = Features(metric_features | other_features)
     dset = load_dataset(results_repo_validation, # change to results_repo_test for test set
                         split='train',
                         features=feature_schema,
                         download_mode="force_redownload")

     feature_schema = Features(metric_features | other_features)
     dset = load_dataset(results_repo_validation, # change to results_repo_test for test set
+                        name='default',
                         split='train',
                         features=feature_schema,
                         download_mode="force_redownload")