Spaces:

vectara
/

forrest_leaderboard_app_test

Running

App Files Files Community

forrest_leaderboard_app_test / app_utils.py

forrestbao

init push

e02fefc 1 day ago

raw

history blame contribute delete

2.19 kB

	# %%
	import os
	import json
	from huggingface_hub import Repository

	import dotenv
	dotenv.load_dotenv()

	# %%
	def pull_results(
	results_dir: str
	):
	repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset", token=os.getenv("HF_TOKEN"))
	repo.git_pull()

	# pull_results()

	# %%
	def extract_info_from_result_file(result_file):
	"""
	{
	"config": {
	"model_dtype": "float16",
	"model_name": "databricks/dbrx-instruct",
	"model_sha": "main"
	},
	"results": {
	"hallucination_rate": {
	"hallucination_rate": 8.34990059642147
	},
	"factual_consistency_rate": {
	"factual_consistency_rate": 91.65009940357854
	},
	"answer_rate": {
	"answer_rate": 100.0
	},
	"average_summary_length": {
	"average_summary_length": 85.9
	}
	}
	"""

	info = json.load(open(result_file, 'r'))
	result = {
	"LLM": info["config"]["model_name"],
	"Hallucination %": info["results"]["hallucination_rate"]["hallucination_rate"],
	# "Factual Consistency Rate": info["results"]["factual_consistency_rate"]["factual_consistency_rate"],
	"Answer %": info["results"]["answer_rate"]["answer_rate"],
	"Avg Summary Length": info["results"]["average_summary_length"]["average_summary_length"],
	}
	return result

	# result_file = "./results/openai/chatgpt-4o-latest/results_2025-04-01 00:34:36.187783.json"
	# print(extract_info_from_result_file(result_file))

	# %%
	def scan_and_extract(dir: str):
	"""Scan all folders recursively and exhaustively to load all JSON files and call `extract_info_from_result_file` on each one.
	"""

	results = []
	for root, dirs, files in os.walk(dir):
	for file in files:
	if file.endswith(".json"):
	results.append(extract_info_from_result_file(os.path.join(root, file)))
	return results

	if __name__ == "__main__":
	results = scan_and_extract("./results")
	with open("./results.json", "w") as f:
	json.dump(results, f, indent=2)

	# %%