File size: 6,062 Bytes
7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 7a6b1cb 85624c2 1fed7a3 7a6b1cb 85624c2 7a6b1cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import json
import re
import pandas as pd
# Path to score files
Noise_Robustness_DIR = "results/Noise Robustness/"
Negative_Rejection_DIR = "results/Negative Rejection/"
Counterfactual_Robustness_DIR = "results/Counterfactual Robustness/"
Infomration_Integration_DIR = "results/Information Integration/"
# Function to read and aggregate score data
def load_scores_common(file_dir, config):
if not os.path.exists(file_dir):
return pd.DataFrame(columns=["Model", "0.2", "0.4", "0.6", "0.8", "1.0"])
score_data = {}
# Define fixed noise rates as columns
fixed_noise_rates = ["0.2", "0.4", "0.6", "0.8", "1.0"]
# Iterate over each model in config['models']
for model in config["models"]:
# Regex pattern to match files for the current model
pattern = re.compile(
rf"^scores_{re.escape(model)}_noise_(?P<noise_rate>[\d.]+)_"
rf"passage_{re.escape(str(config['passage_num']))}_num_queries_{re.escape(str(config['num_queries']))}\.json$"
)
model_scores = {rate: "N/A" for rate in fixed_noise_rates} # Initialize all as "N/A"
# Search for matching files in directory
for filename in os.listdir(file_dir):
match = pattern.match(filename)
if match:
noise_rate = match.group("noise_rate") # Extract noise rate from filename
if noise_rate in fixed_noise_rates: # Only consider predefined noise rates
filepath = os.path.join(file_dir, filename)
with open(filepath, "r") as f:
score = json.load(f)
accuracy = score.get("accuracy", "N/A")
model_scores[noise_rate] = f"{accuracy * 100:.2f}" # Convert to percentage
# Store in score_data
score_data[model] = model_scores
# Convert to DataFrame
df = pd.DataFrame([
{
"Model": model,
**score_data[model]
}
for model in config["models"]
])
return df
# Function to load Negative Rejection scores (Only for Noise Rate = 1.0)
def load_negative_rejection_scores(config):
if not os.path.exists(Negative_Rejection_DIR):
return pd.DataFrame()
if not os.path.exists(Negative_Rejection_DIR):
return pd.DataFrame(columns=["Model", "Rejection Rate %"])
score_data = {}
# Iterate over each model in config['models']
for model in config["models"]:
# Expected filename pattern for each model
expected_filename = f"scores_{model}_noise_1.0_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
filepath = os.path.join(Negative_Rejection_DIR, expected_filename)
# Check if file exists
if os.path.exists(filepath):
with open(filepath, "r") as f:
score = json.load(f)
reject_rate = score.get("reject_rate", "N/A")
score_data[model] = f"{reject_rate * 100}" if reject_rate != "N/A" else "N/A"
else:
score_data[model] = "N/A"
# Convert to DataFrame
df = pd.DataFrame([
{"Model": model, "Rejection Rate %": score_data[model]}
for model in config["models"]
])
return df
def load_counterfactual_robustness_scores(config):
"""Load and format counterfactual robustness scores into a table with proper formatting."""
config['noise_rate'] = 0.4 # Hardcode noise rate
if not os.path.exists(Counterfactual_Robustness_DIR):
return pd.DataFrame(columns=["Model", "Accuracy (%)", "Acc_doc (%)", "Error Detection Rate (%)", "Correction Rate (%)"])
score_data = {}
for model in config["models"]:
expected_filename = f"scores_{model}_noise_{config['noise_rate']}_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
filepath = os.path.join(Counterfactual_Robustness_DIR, expected_filename)
if os.path.exists(filepath):
with open(filepath, "r") as f:
scores_json = json.load(f) # Read the full JSON content
factual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "factual_only"), {})
counterfactual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "counterfactual"), {})
score_data[model] = {
"Accuracy (%)": int(round(factual_score.get("all_rate", 0) * 100)) if factual_score else "N/A",
"Acc_doc (%)": int(round(counterfactual_score.get("all_rate", 0) * 100)) if counterfactual_score else "N/A",
"Error Detection Rate (%)": int(round(counterfactual_score.get("reject_rate", 0) * 100)) if counterfactual_score else "N/A",
"Correction Rate (%)": round(counterfactual_score.get("correct_rate", 0) * 100, 2) if counterfactual_score else "N/A"
}
else:
score_data[model] = {
"Accuracy (%)": "N/A",
"Acc_doc (%)": "N/A",
"Error Detection Rate (%)": "N/A",
"Correction Rate (%)": "N/A"
}
# Convert to DataFrame
df = pd.DataFrame([
{
"Model": model,
"Accuracy (%)": f"{score_data[model]['Accuracy (%)']}" if score_data[model]["Accuracy (%)"] != "N/A" else "N/A",
"Acc_doc (%)": f"{score_data[model]['Acc_doc (%)']}" if score_data[model]["Acc_doc (%)"] != "N/A" else "N/A",
"Error Detection Rate (%)": f"{score_data[model]['Error Detection Rate (%)']}" if score_data[model]["Error Detection Rate (%)"] != "N/A" else "N/A",
"Correction Rate (%)": f"{score_data[model]['Correction Rate (%)']:.2f}" if score_data[model]["Correction Rate (%)"] != "N/A" else "N/A"
}
for model in config["models"]
])
return df
|