# %% import os import json from huggingface_hub import Repository import dotenv dotenv.load_dotenv() # %% def pull_results( results_dir: str ): repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset", token=os.getenv("HF_TOKEN")) repo.git_pull() # pull_results() # %% def extract_info_from_result_file(result_file): """ { "config": { "model_dtype": "float16", "model_name": "databricks/dbrx-instruct", "model_sha": "main" }, "results": { "hallucination_rate": { "hallucination_rate": 8.34990059642147 }, "factual_consistency_rate": { "factual_consistency_rate": 91.65009940357854 }, "answer_rate": { "answer_rate": 100.0 }, "average_summary_length": { "average_summary_length": 85.9 } } """ info = json.load(open(result_file, 'r')) result = { "LLM": info["config"]["model_name"], "Hallucination %": info["results"]["hallucination_rate"]["hallucination_rate"], # "Factual Consistency Rate": info["results"]["factual_consistency_rate"]["factual_consistency_rate"], "Answer %": info["results"]["answer_rate"]["answer_rate"], "Avg Summary Length": info["results"]["average_summary_length"]["average_summary_length"], } return result # result_file = "./results/openai/chatgpt-4o-latest/results_2025-04-01 00:34:36.187783.json" # print(extract_info_from_result_file(result_file)) # %% def scan_and_extract(dir: str): """Scan all folders recursively and exhaustively to load all JSON files and call `extract_info_from_result_file` on each one. """ results = [] for root, dirs, files in os.walk(dir): for file in files: if file.endswith(".json"): results.append(extract_info_from_result_file(os.path.join(root, file))) return results if __name__ == "__main__": results = scan_and_extract("./results") with open("./results.json", "w") as f: json.dump(results, f, indent=2) # %%