Spaces:

vectara
/

forrest_leaderboard_app_test

Running

App Files Files Community

forrest_leaderboard_app_test / app.py

forrestbao

take theme out

3954866 about 6 hours ago

raw

history blame contribute delete

3.03 kB

	from ast import arguments
	from typing import Literal, List
	import json

	import pandas as pd

	import dotenv
	dotenv.load_dotenv() # load HF_TOKEN

	from funix import funix, import_theme
	from vectara_theme import vectara_theme
	import_theme(vectara_theme)

	from app_utils import pull_results, scan_and_extract

	# load pre-dumped results in case loading fails
	results = json.load(open("./results.json", "r"))
	results_df = pd.DataFrame(results)

	# pull latest results and extract results
	try:
	pull_results("./results")
	results = scan_and_extract("./results")
	results_df = pd.DataFrame(results)
	except Exception as e:
	print(f"Failed to pull and/or extract latest results: {e}")

	@funix(
	title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard",
	direction="column",
	autorun=True,
	theme="vectara"
	)
	def leaderboard(
	filter_models_by_name: str = ""
	) -> pd.DataFrame:
	"""# Hughes Hallucination Evaluation Model (HHEM) Leaderboard

	Using [Vectara](https://vectara.com/)'s proprietary [HHEM](https://www.vectara.com/blog/hhem-2-1-a-better-hallucination-detection-model), this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document. For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates. HHEM's open source version is available [here](https://huggingface.co/vectara/hallucination_evaluation_model). For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard).

	Work in progress: For Internal Use Only.

	## Usage

	* All LLMs are displayed by default. To filter, enter the names of the models that you want to see in the "Filter Models by Name" field below, separated by commas or semicolons.
	* Results are paginated. To page thru, use the `<` or `>` buttons at the bottom right corner of the table.
	* To sort the table, hover over a column header and click the arrow. The arrow automatically points up and down depending on the sort order.
	* Click the "Refresh" button to refresh the leaderboard if the table is not shown or does not update when you change the filter.



	Args:
	filter_models_by_name: filter models by name using comma-separated strings
	"""
	df = results_df
	filter_models_by_name = filter_models_by_name.replace(",", ";")
	filter_models_by_name = filter_models_by_name.replace(" ", "")

	# truncate hallucination % to 2 decimal places
	df["Hallucination %"] = df["Hallucination %"].apply(lambda x: round(x, 3))

	if len(filter_models_by_name) > 0:
	filter_models_by_name = filter_models_by_name.split(";")
	filter_models_by_name = [name for name in filter_models_by_name if name != ""]
	df = df.copy()
	df = df[df["LLM"].str.contains("\|".join(filter_models_by_name), na=False)]
	df = df.sort_values(by="Hallucination %", ascending=True)
	return df