|
|
|
import os
|
|
import json
|
|
import re
|
|
import pandas as pd
|
|
|
|
|
|
Noise_Robustness_DIR = "results/Noise Robustness/"
|
|
Negative_Rejection_DIR = "results/Negative Rejection/"
|
|
Counterfactual_Robustness_DIR = "results/Counterfactual Robustness/"
|
|
Infomration_Integration_DIR = "results/Information Integration/"
|
|
|
|
|
|
|
|
def load_scores_common(file_dir, config):
|
|
|
|
if not os.path.exists(file_dir):
|
|
return pd.DataFrame(columns=["Model", "0.2", "0.4", "0.6", "0.8", "1.0"])
|
|
|
|
score_data = {}
|
|
|
|
|
|
fixed_noise_rates = ["0.2", "0.4", "0.6", "0.8", "1.0"]
|
|
|
|
|
|
for model in config["models"]:
|
|
|
|
pattern = re.compile(
|
|
rf"^scores_{re.escape(model)}_noise_(?P<noise_rate>[\d.]+)_"
|
|
rf"passage_{re.escape(str(config['passage_num']))}_num_queries_{re.escape(str(config['num_queries']))}\.json$"
|
|
)
|
|
|
|
model_scores = {rate: "N/A" for rate in fixed_noise_rates}
|
|
|
|
|
|
for filename in os.listdir(file_dir):
|
|
match = pattern.match(filename)
|
|
if match:
|
|
noise_rate = match.group("noise_rate")
|
|
|
|
if noise_rate in fixed_noise_rates:
|
|
filepath = os.path.join(file_dir, filename)
|
|
with open(filepath, "r") as f:
|
|
score = json.load(f)
|
|
accuracy = score.get("accuracy", "N/A")
|
|
model_scores[noise_rate] = f"{accuracy * 100:.2f}"
|
|
|
|
|
|
score_data[model] = model_scores
|
|
|
|
|
|
df = pd.DataFrame([
|
|
{
|
|
"Model": model,
|
|
**score_data[model]
|
|
}
|
|
for model in config["models"]
|
|
])
|
|
|
|
return df
|
|
|
|
|
|
def load_negative_rejection_scores(config):
|
|
if not os.path.exists(Negative_Rejection_DIR):
|
|
return pd.DataFrame()
|
|
|
|
if not os.path.exists(Negative_Rejection_DIR):
|
|
return pd.DataFrame(columns=["Model", "Rejection Rate %"])
|
|
|
|
score_data = {}
|
|
|
|
|
|
for model in config["models"]:
|
|
|
|
expected_filename = f"scores_{model}_noise_1.0_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
|
|
filepath = os.path.join(Negative_Rejection_DIR, expected_filename)
|
|
|
|
|
|
if os.path.exists(filepath):
|
|
with open(filepath, "r") as f:
|
|
score = json.load(f)
|
|
reject_rate = score.get("reject_rate", "N/A")
|
|
score_data[model] = f"{reject_rate * 100}" if reject_rate != "N/A" else "N/A"
|
|
else:
|
|
score_data[model] = "N/A"
|
|
|
|
|
|
df = pd.DataFrame([
|
|
{"Model": model, "Rejection Rate %": score_data[model]}
|
|
for model in config["models"]
|
|
])
|
|
|
|
return df
|
|
|
|
def load_counterfactual_robustness_scores(config):
|
|
"""Load and format counterfactual robustness scores into a table with proper formatting."""
|
|
config['noise_rate'] = 0.4
|
|
|
|
if not os.path.exists(Counterfactual_Robustness_DIR):
|
|
return pd.DataFrame(columns=["Model", "Accuracy (%)", "Acc_doc (%)", "Error Detection Rate (%)", "Correction Rate (%)"])
|
|
|
|
score_data = {}
|
|
|
|
for model in config["models"]:
|
|
expected_filename = f"scores_{model}_noise_{config['noise_rate']}_passage_{config['passage_num']}_num_queries_{config['num_queries']}.json"
|
|
filepath = os.path.join(Counterfactual_Robustness_DIR, expected_filename)
|
|
|
|
if os.path.exists(filepath):
|
|
with open(filepath, "r") as f:
|
|
scores_json = json.load(f)
|
|
factual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "factual_only"), {})
|
|
counterfactual_score = next((s for s in scores_json["conditions"] if s["condition_label"] == "counterfactual"), {})
|
|
|
|
score_data[model] = {
|
|
"Accuracy (%)": int(round(factual_score.get("all_rate", 0) * 100)) if factual_score else "N/A",
|
|
"Acc_doc (%)": int(round(counterfactual_score.get("all_rate", 0) * 100)) if counterfactual_score else "N/A",
|
|
"Error Detection Rate (%)": int(round(counterfactual_score.get("reject_rate", 0) * 100)) if counterfactual_score else "N/A",
|
|
"Correction Rate (%)": round(counterfactual_score.get("correct_rate", 0) * 100, 2) if counterfactual_score else "N/A"
|
|
}
|
|
else:
|
|
score_data[model] = {
|
|
"Accuracy (%)": "N/A",
|
|
"Acc_doc (%)": "N/A",
|
|
"Error Detection Rate (%)": "N/A",
|
|
"Correction Rate (%)": "N/A"
|
|
}
|
|
|
|
|
|
df = pd.DataFrame([
|
|
{
|
|
"Model": model,
|
|
"Accuracy (%)": f"{score_data[model]['Accuracy (%)']}" if score_data[model]["Accuracy (%)"] != "N/A" else "N/A",
|
|
"Acc_doc (%)": f"{score_data[model]['Acc_doc (%)']}" if score_data[model]["Acc_doc (%)"] != "N/A" else "N/A",
|
|
"Error Detection Rate (%)": f"{score_data[model]['Error Detection Rate (%)']}" if score_data[model]["Error Detection Rate (%)"] != "N/A" else "N/A",
|
|
"Correction Rate (%)": f"{score_data[model]['Correction Rate (%)']:.2f}" if score_data[model]["Correction Rate (%)"] != "N/A" else "N/A"
|
|
}
|
|
for model in config["models"]
|
|
])
|
|
|
|
return df
|
|
|