| import json |
| import os |
| import argparse |
| import numpy as np |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import pandas as pd |
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| |
| parser.add_argument("--output_dir", type=str, default="analysis_output_parallel", help="Directory with partial .json results") |
| args = parser.parse_args() |
|
|
| print(f"📂 Reading results from: {args.output_dir}") |
| |
| |
| final_results = {"Gzip": [], "Tokenizer": [], "AC_M1": []} |
| files_found = 0 |
| |
| if not os.path.exists(args.output_dir): |
| print(f"❌ Error: Directory {args.output_dir} does not exist.") |
| return |
|
|
| for filename in os.listdir(args.output_dir): |
| if filename.startswith("partial_result_") and filename.endswith(".json"): |
| files_found += 1 |
| file_path = os.path.join(args.output_dir, filename) |
| try: |
| with open(file_path, 'r') as f: |
| data = json.load(f) |
| for k in final_results: |
| if k in data: |
| final_results[k].extend(data[k]) |
| except Exception as e: |
| print(f"⚠️ Error reading {filename}: {e}") |
| |
| print(f"✅ Merged data from {files_found} files.") |
| |
| |
| plot_records = [] |
| stats_summary = {} |
| |
| for algo, vals in final_results.items(): |
| if not vals: |
| continue |
| |
| |
| cleaned = [v for v in vals if v < 2.0] |
| |
| |
| stats_summary[algo] = { |
| "mean": float(np.mean(vals)), |
| "median": float(np.median(vals)), |
| "count": len(vals) |
| } |
| |
| |
| for v in cleaned: |
| plot_records.append({"Algorithm": algo, "Normalized Edit Distance": v}) |
| |
| if not plot_records: |
| print("❌ No valid data collected to plot.") |
| return |
|
|
| |
| df = pd.DataFrame(plot_records) |
| print(f"📊 Plotting {len(df)} data points...") |
|
|
| |
| plt.figure(figsize=(12, 7)) |
| sns.set_style("whitegrid") |
| |
| |
| sns.kdeplot( |
| data=df, |
| x="Normalized Edit Distance", |
| hue="Algorithm", |
| fill=True, |
| common_norm=False, |
| palette="tab10", |
| alpha=0.5, |
| linewidth=2 |
| ) |
| |
| plt.title("Compression Stability Analysis (Impact of 10% Perturbation)") |
| plt.xlabel("Normalized Levenshtein Distance (Lower = More Stable)") |
| plt.ylabel("Density") |
| plt.xlim(0, 1.2) |
| |
| output_img = os.path.join(args.output_dir, "stability_parallel_fixed.png") |
| plt.savefig(output_img, dpi=300) |
| print(f"🖼️ Plot saved to: {output_img}") |
| |
| |
| stats_file = os.path.join(args.output_dir, "final_stats_summary.json") |
| with open(stats_file, 'w') as f: |
| json.dump(stats_summary, f, indent=2) |
| print(f"📄 Stats saved to: {stats_file}") |
| |
| |
| print("\n=== Summary Stats ===") |
| for algo, stat in stats_summary.items(): |
| print(f"{algo}: Mean={stat['mean']:.4f}, Count={stat['count']}") |
|
|
| if __name__ == "__main__": |
| main() |