|
|
|
import os |
|
import json |
|
from huggingface_hub import Repository |
|
|
|
import dotenv |
|
dotenv.load_dotenv() |
|
|
|
|
|
def pull_results( |
|
results_dir: str |
|
): |
|
repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset", token=os.getenv("HF_TOKEN")) |
|
repo.git_pull() |
|
|
|
|
|
|
|
|
|
def extract_info_from_result_file(result_file): |
|
""" |
|
{ |
|
"config": { |
|
"model_dtype": "float16", |
|
"model_name": "databricks/dbrx-instruct", |
|
"model_sha": "main" |
|
}, |
|
"results": { |
|
"hallucination_rate": { |
|
"hallucination_rate": 8.34990059642147 |
|
}, |
|
"factual_consistency_rate": { |
|
"factual_consistency_rate": 91.65009940357854 |
|
}, |
|
"answer_rate": { |
|
"answer_rate": 100.0 |
|
}, |
|
"average_summary_length": { |
|
"average_summary_length": 85.9 |
|
} |
|
} |
|
""" |
|
|
|
info = json.load(open(result_file, 'r')) |
|
result = { |
|
"LLM": info["config"]["model_name"], |
|
"Hallucination %": info["results"]["hallucination_rate"]["hallucination_rate"], |
|
|
|
"Answer %": info["results"]["answer_rate"]["answer_rate"], |
|
"Avg Summary Length": info["results"]["average_summary_length"]["average_summary_length"], |
|
} |
|
return result |
|
|
|
|
|
|
|
|
|
|
|
def scan_and_extract(dir: str): |
|
"""Scan all folders recursively and exhaustively to load all JSON files and call `extract_info_from_result_file` on each one. |
|
""" |
|
|
|
results = [] |
|
for root, dirs, files in os.walk(dir): |
|
for file in files: |
|
if file.endswith(".json"): |
|
results.append(extract_info_from_result_file(os.path.join(root, file))) |
|
return results |
|
|
|
if __name__ == "__main__": |
|
results = scan_and_extract("./results") |
|
with open("./results.json", "w") as f: |
|
json.dump(results, f, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
|