OpenCaptchaWorld commited on
Commit
d9ef651
·
1 Parent(s): a1c511d

debug updating system

Browse files
Files changed (1) hide show
  1. app.py +36 -3
app.py CHANGED
@@ -900,16 +900,46 @@ def process_uploaded_file(file, model_name=None, provider=None, agent_framework=
900
  except Exception as e:
901
  return None, f"Error processing file: {str(e)}"
902
 
 
 
 
 
 
 
 
 
 
 
 
903
  def aggregate_runs_to_csv():
904
  """
905
  Aggregate all JSON files in runs/ directory into results.csv.
906
  This consolidates all uploaded evaluation results into a single CSV file.
907
  Deduplicates records based on (Model, Provider, Agent Framework) combination,
908
  keeping the most recent entry for each unique combination.
 
909
  """
910
  runs_path = get_runs_path()
911
  results_path = get_results_path()
912
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
913
  # Gather all JSON files with their modification times
914
  records_with_time = []
915
  for path in runs_path.glob("*.json"):
@@ -921,7 +951,10 @@ def aggregate_runs_to_csv():
921
  except Exception as e:
922
  print(f"Warning: Skipping invalid JSON file {path}: {e}")
923
 
924
- if not records_with_time:
 
 
 
925
  # Create empty CSV with headers
926
  fixed_metadata = ["Model", "Provider", "Agent Framework", "Type"]
927
  fixed_metrics = ["Overall Pass Rate", "Avg Duration (s)", "Avg Cost ($)"]
@@ -931,12 +964,12 @@ def aggregate_runs_to_csv():
931
  return
932
 
933
  # Sort by modification time (most recent first)
934
- records_with_time.sort(key=lambda x: x[0], reverse=True)
935
 
936
  # Handle legacy column names and infer Type
937
  legacy_map = {"Notes": "Agent Framework", "Overall": "Overall Pass Rate"}
938
  processed_records = []
939
- for mtime, record in records_with_time:
940
  for old_key, new_key in legacy_map.items():
941
  if old_key in record and new_key not in record:
942
  record[new_key] = record.pop(old_key)
 
900
  except Exception as e:
901
  return None, f"Error processing file: {str(e)}"
902
 
903
+ def clean_nan_values(record):
904
+ """Convert NaN values to None for proper CSV serialization."""
905
+ import math
906
+ cleaned = {}
907
+ for key, value in record.items():
908
+ if pd.isna(value) or (isinstance(value, float) and math.isnan(value)):
909
+ cleaned[key] = None
910
+ else:
911
+ cleaned[key] = value
912
+ return cleaned
913
+
914
  def aggregate_runs_to_csv():
915
  """
916
  Aggregate all JSON files in runs/ directory into results.csv.
917
  This consolidates all uploaded evaluation results into a single CSV file.
918
  Deduplicates records based on (Model, Provider, Agent Framework) combination,
919
  keeping the most recent entry for each unique combination.
920
+ Preserves existing records from results.csv that aren't in runs/ directory.
921
  """
922
  runs_path = get_runs_path()
923
  results_path = get_results_path()
924
 
925
+ # First, load existing results.csv to preserve models not in new uploads
926
+ existing_records_with_time = []
927
+ if results_path.exists():
928
+ try:
929
+ df_existing = load_df(results_path)
930
+ if len(df_existing) > 0:
931
+ # Convert existing records to dict format
932
+ for _, row in df_existing.iterrows():
933
+ record = row.to_dict()
934
+ # Clean NaN values
935
+ record = clean_nan_values(record)
936
+ # Use file modification time - 1 day as timestamp (older than new uploads)
937
+ # This ensures new uploads take precedence, but existing records are preserved
938
+ existing_mtime = results_path.stat().st_mtime - 86400 # 1 day ago
939
+ existing_records_with_time.append((existing_mtime, record))
940
+ except Exception as e:
941
+ print(f"Warning: Error loading existing results.csv: {e}")
942
+
943
  # Gather all JSON files with their modification times
944
  records_with_time = []
945
  for path in runs_path.glob("*.json"):
 
951
  except Exception as e:
952
  print(f"Warning: Skipping invalid JSON file {path}: {e}")
953
 
954
+ # Combine existing records with new records from runs/
955
+ all_records_with_time = existing_records_with_time + records_with_time
956
+
957
+ if not all_records_with_time:
958
  # Create empty CSV with headers
959
  fixed_metadata = ["Model", "Provider", "Agent Framework", "Type"]
960
  fixed_metrics = ["Overall Pass Rate", "Avg Duration (s)", "Avg Cost ($)"]
 
964
  return
965
 
966
  # Sort by modification time (most recent first)
967
+ all_records_with_time.sort(key=lambda x: x[0], reverse=True)
968
 
969
  # Handle legacy column names and infer Type
970
  legacy_map = {"Notes": "Agent Framework", "Overall": "Overall Pass Rate"}
971
  processed_records = []
972
+ for mtime, record in all_records_with_time:
973
  for old_key, new_key in legacy_map.items():
974
  if old_key in record and new_key not in record:
975
  record[new_key] = record.pop(old_key)