Spaces:

vectara
/

forrest_leaderboard_app_test

Running

File size: 2,190 Bytes

e02fefc

# %%
import os 
import json
from huggingface_hub import Repository

import dotenv
dotenv.load_dotenv()

# %%
def pull_results(
    results_dir: str
):
    repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset", token=os.getenv("HF_TOKEN")) 
    repo.git_pull()

# pull_results()

# %%
def extract_info_from_result_file(result_file):
    """
        {
        "config": {
            "model_dtype": "float16",
            "model_name": "databricks/dbrx-instruct",
            "model_sha": "main"
        },
        "results": {
            "hallucination_rate": {
            "hallucination_rate": 8.34990059642147
            },
            "factual_consistency_rate": {
            "factual_consistency_rate": 91.65009940357854
            },
            "answer_rate": {
            "answer_rate": 100.0
            },
            "average_summary_length": {
            "average_summary_length": 85.9
            }
        }
    """

    info = json.load(open(result_file, 'r'))
    result = {
        "LLM": info["config"]["model_name"],
        "Hallucination %": info["results"]["hallucination_rate"]["hallucination_rate"],
        # "Factual Consistency Rate": info["results"]["factual_consistency_rate"]["factual_consistency_rate"],
        "Answer %": info["results"]["answer_rate"]["answer_rate"],
        "Avg Summary Length": info["results"]["average_summary_length"]["average_summary_length"],
    }
    return result

# result_file = "./results/openai/chatgpt-4o-latest/results_2025-04-01 00:34:36.187783.json"
# print(extract_info_from_result_file(result_file))

# %%
def scan_and_extract(dir: str): 
    """Scan all folders recursively and exhaustively to load all JSON files and call `extract_info_from_result_file` on each one.
    """

    results = []
    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith(".json"):
                results.append(extract_info_from_result_file(os.path.join(root, file)))
    return results

if __name__ == "__main__":
    results = scan_and_extract("./results")
    with open("./results.json", "w") as f:
        json.dump(results, f, indent=2)

# %%