forrestbao's picture
take theme out
3954866
from ast import arguments
from typing import Literal, List
import json
import pandas as pd
import dotenv
dotenv.load_dotenv() # load HF_TOKEN
from funix import funix, import_theme
from vectara_theme import vectara_theme
import_theme(vectara_theme)
from app_utils import pull_results, scan_and_extract
# load pre-dumped results in case loading fails
results = json.load(open("./results.json", "r"))
results_df = pd.DataFrame(results)
# pull latest results and extract results
try:
pull_results("./results")
results = scan_and_extract("./results")
results_df = pd.DataFrame(results)
except Exception as e:
print(f"Failed to pull and/or extract latest results: {e}")
@funix(
title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard",
direction="column",
autorun=True,
theme="vectara"
)
def leaderboard(
filter_models_by_name: str = ""
) -> pd.DataFrame:
"""# Hughes Hallucination Evaluation Model (HHEM) Leaderboard
Using [Vectara](https://vectara.com/)'s proprietary [HHEM](https://www.vectara.com/blog/hhem-2-1-a-better-hallucination-detection-model), this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document. For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates. HHEM's open source version is available [here](https://huggingface.co/vectara/hallucination_evaluation_model). For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard).
**Work in progress**: For Internal Use Only.
## Usage
* All LLMs are displayed by default. To filter, enter the names of the models that you want to see in the "Filter Models by Name" field below, separated by commas or semicolons.
* Results are paginated. To page thru, use the `<` or `>` buttons at the bottom right corner of the table.
* To sort the table, hover over a column header and click the arrow. The arrow automatically points up and down depending on the sort order.
* Click the "Refresh" button to refresh the leaderboard if the table is not shown or does not update when you change the filter.
Args:
filter_models_by_name: filter models by name using comma-separated strings
"""
df = results_df
filter_models_by_name = filter_models_by_name.replace(",", ";")
filter_models_by_name = filter_models_by_name.replace(" ", "")
# truncate hallucination % to 2 decimal places
df["Hallucination %"] = df["Hallucination %"].apply(lambda x: round(x, 3))
if len(filter_models_by_name) > 0:
filter_models_by_name = filter_models_by_name.split(";")
filter_models_by_name = [name for name in filter_models_by_name if name != ""]
df = df.copy()
df = df[df["LLM"].str.contains("|".join(filter_models_by_name), na=False)]
df = df.sort_values(by="Hallucination %", ascending=True)
return df