mgyigit commited on
Commit
4670ac7
1 Parent(s): 8903ad9

Update src/saving_utils.py

Browse files
Files changed (1) hide show
  1. src/saving_utils.py +54 -13
src/saving_utils.py CHANGED
@@ -1,16 +1,47 @@
1
  import os
2
  import pandas as pd
3
 
4
- script_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the running script
5
 
 
6
 
7
- def save_similarity_output(output_dict, method_name, leaderboard_path="data/leaderboard_results.csv", similarity_path="data/similarity_results.csv"):
8
- leaderboard_path = os.path.join(script_dir, leaderboard_path)
9
- similarity_path = os.path.join(script_dir, similarity_path)
10
 
11
- with open("test_write.txt", "w") as f:
12
- f.write("Write test successful!")
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Load or initialize the DataFrames
15
  if os.path.exists(leaderboard_path):
16
  leaderboard_df = pd.read_csv(leaderboard_path)
@@ -24,7 +55,6 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="data/lead
24
  print("Similarity file not found!")
25
  return -1
26
 
27
- # Ensure the method exists in the similarity DataFrame
28
  if method_name not in similarity_df['Method'].values:
29
  # Create a new row for the method with default values
30
  new_row = {col: None for col in similarity_df.columns}
@@ -74,11 +104,22 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="data/lead
74
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
75
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
76
 
77
- # Save the updated DataFrames back to CSV
78
- similarity_df.to_csv(similarity_path, index=False)
79
- leaderboard_df.to_csv(leaderboard_path, index=False)
80
- print(f"Updated files saved to {similarity_path} and {leaderboard_path}")
81
-
 
 
 
 
 
 
 
 
 
 
 
82
  return 0
83
 
84
  def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):
 
1
  import os
2
  import pandas as pd
3
 
4
+ from huggingface_hub import HfApi
5
 
6
+ script_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the running script
7
 
 
 
 
8
 
9
+ def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
10
+ # Ensure the save directory exists
11
+ os.makedirs(save_dir, exist_ok=True)
12
+
13
+ # Construct the full file path
14
+ file_path = os.path.join(save_dir, file_name)
15
+
16
+ # Save the DataFrame as a CSV
17
+ dataframe.to_csv(file_path, index=False)
18
+ print(f"Saved {file_name} to {file_path}")
19
+
20
+ return file_path
21
+
22
+ def upload_to_hub(local_path, remote_path, repo_id, repo_type="dataset"):
23
+ api = HfApi() # Requires authentication via HF_TOKEN
24
+ api.upload_file(
25
+ path_or_fileobj=local_path,
26
+ path_in_repo=remote_path,
27
+ repo_id=repo_id,
28
+ repo_type=repo_type,
29
+ commit_message=f"Updating {os.path.basename(remote_path)}"
30
+ )
31
+ print(f"Uploaded {local_path} to {repo_id}/{remote_path}")
32
+
33
+ def cleanup_local_file(file_path):
34
+ if os.path.exists(file_path):
35
+ os.remove(file_path)
36
+ print(f"Removed local file: {file_path}")
37
+
38
+ def save_similarity_output(
39
+ output_dict,
40
+ method_name,
41
+ leaderboard_path="/home/user/app/src/data/leaderboard_results.csv",
42
+ similarity_path="/home/user/app/src/data/similarity_results.csv",
43
+ repo_id="mgyigit/probe3",
44
+ ):
45
  # Load or initialize the DataFrames
46
  if os.path.exists(leaderboard_path):
47
  leaderboard_df = pd.read_csv(leaderboard_path)
 
55
  print("Similarity file not found!")
56
  return -1
57
 
 
58
  if method_name not in similarity_df['Method'].values:
59
  # Create a new row for the method with default values
60
  new_row = {col: None for col in similarity_df.columns}
 
104
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
105
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
106
 
107
+ # Save locally to a temporary directory
108
+ leaderboard_file = save_csv_locally(leaderboard_df, "leaderboard_results.csv")
109
+ similarity_file = save_csv_locally(similarity_df, "similarity_results.csv")
110
+
111
+ # Upload to Hugging Face Hub
112
+ try:
113
+ upload_to_hub(leaderboard_file, "leaderboard_results.csv", repo_id)
114
+ upload_to_hub(similarity_file, "similarity_results.csv", repo_id)
115
+ except Exception as e:
116
+ print(f"Failed to upload files: {e}")
117
+ return -1
118
+
119
+ # Clean up local files
120
+ cleanup_local_file(leaderboard_file)
121
+ cleanup_local_file(similarity_file)
122
+
123
  return 0
124
 
125
  def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):