fev-bench

Running

fev-bench / app.py

Oleksandr Shchur

Add results for TimesFM-2.0

54941f4 9 months ago

4.97 kB

	import fev
	import gradio as gr
	import pandas as pd

	from src import about
	from src.custom_html_js import custom_css
	from src.formatting import make_clickable_model

	# Load the CSV data into a pandas DataFrame
	df = pd.read_csv(
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
	)


	summary_urls = [
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm-2.0.csv",
	]

	rename_cols = {
	"gmean_relative_error": "Average relative error",
	"avg_rank": "Average rank",
	"median_inference_time_s": "Median inference time (s)",
	"training_corpus_overlap": "Training corpus overlap (%)",
	}
	selected_cols = list(rename_cols.keys())


	def highlight_zeroshot(styler):
	"""Highlight training overlap for zero-shot models with bold green."""

	def style_func(val):
	if val == 0:
	return "color: green; font-weight: bold"
	else:
	return "color: black"

	return styler.map(style_func, subset=["Training corpus overlap (%)"])


	leaderboards = {}
	for metric in ["WQL", "MASE"]:
	lb = fev.leaderboard(summary_urls, metric_column=metric)[selected_cols].rename(columns=rename_cols)
	format_dict = {}
	for col in lb.columns:
	format_dict[col] = "{:.3f}" if col != "Training corpus overlap (%)" else "{:.1%}"
	lb = lb.reset_index()
	lb["model_name"] = lb["model_name"].apply(make_clickable_model)
	leaderboards[metric] = highlight_zeroshot(lb.style.format(format_dict))


	with gr.Blocks(css=custom_css) as demo:
	gr.HTML(about.TITLE)
	gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons"):
	with gr.Tab("🏅 Chronos Benchmark II", id=0):
	with gr.Column():
	gr.Markdown(about.CHRONOS_BENCHMARK, elem_classes="markdown-text")
	with gr.Tabs():
	with gr.Tab("📊 Probabilistic forecast (WQL)"):
	gr.Markdown("""Forecast accuracy measured by Weighted Quantile Loss.""")
	gr.Dataframe(
	value=leaderboards["WQL"],
	datatype=["markdown", "number", "number", "number"],
	interactive=False,
	)

	with gr.Tab("📈 Point forecast (MASE)"):
	gr.Markdown("""Forecast accuracy measured by Mean Absolute Scaled Error.""")
	gr.Dataframe(
	value=leaderboards["MASE"],
	datatype=["markdown", "number", "number", "number"],
	interactive=False,
	)

	with gr.Tab("📝 About", id=1):
	gr.Markdown(about.ABOUT_LEADERBOARD)

	if __name__ == "__main__":
	demo.launch()