Spaces:

Intel
/

powered_by_intel_llm_leaderboard

Running

powered_by_intel_llm_leaderboard / app.py

Benjamin Consolvo

doc updates

491fabd 5 months ago

13.7 kB

	import gradio as gr
	import pandas as pd
	import requests


	from info.train_a_model import (
	LLM_BENCHMARKS_TEXT)
	from info.submit import (
	SUBMIT_TEXT)
	from info.deployment import (
	DEPLOY_TEXT)
	from info.programs import (
	PROGRAMS_TEXT)
	from info.citation import(
	CITATION_TEXT)
	from info.validated_chat_models import(
	VALIDATED_CHAT_MODELS)
	from src.processing import filter_benchmarks_table

	#inference_endpoint_url = os.environ['inference_endpoint_url']
	#inference_concurrency_limit = os.environ['inference_concurrency_limit']

	demo = gr.Blocks()

	with demo:

	gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
	gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs
	that have been pre-trained or fine-tuned on Intel Hardware 🦾. To submit your model for evaluation,
	follow the instructions and complete the form in the 🏎️ Submit tab. Models submitted to the leaderboard are evaluated
	on the Intel Developer Cloud ☁️. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
	the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
	gr.Markdown("""Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and
	talk about everything from GenAI, HPC, to Quantum Computing.""")
	gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
	team for generously sharing their code and best
	practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")

	with gr.Accordion("Chat with Top Models on the Leaderboard Here 💬", open=False):
	# import pdb

	chat_model_dropdown = gr.Dropdown(
	choices=VALIDATED_CHAT_MODELS,
	label="Select a leaderboard model to chat with. ",
	multiselect=False,
	value=VALIDATED_CHAT_MODELS[0],
	interactive=True,
	)

	#chat_model_selection = chat_model_dropdown.value
	chat_model_selection = 'Intel/neural-chat-7b-v1-1'

	#def call_api_and_stream_response(query, chat_model):
	# """
	# Call the API endpoint and yield characters as they are received.
	# This function simulates streaming by yielding characters one by one.
	# """
	# url = "http://localhost:5004/query-stream/"
	# params = {"query": query,"selected_model":chat_model}
	# with requests.get(url, json=params, stream=True) as r:
	# for chunk in r.iter_content(chunk_size=1):
	# if chunk:
	# yield chunk.decode()
	#
	#def get_response(query, history):
	# """
	# Wrapper function to call the streaming API and compile the response.
	# """
	# response = ''
	#
	# global chat_model_selection
	#
	# for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
	# if char == '<':
	# break
	# response += char
	# yield response
	#
	#gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=5).launch()


	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
	with gr.Row():
	with gr.Column():
	filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
	label="Select Training Platform*",
	elem_id="compute_platforms",
	value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
	filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
	label="Training Infrastructure*",
	elem_id="training_infra",
	value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
	filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Liftoff", "Intel Labs", "Other"],
	label="Intel Program Affiliation",
	elem_id="program_affiliation",
	value=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"])

	with gr.Column():
	filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100],
	label="Model Sizes (Billion of Parameters)",
	elem_id="parameter_size",
	value=[1,3,5,7,13,35,60,70,100])
	filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"],
	label="Model Precision",
	elem_id="precision",
	value=["fp32","fp16","bf16","int8","fp8", "int4"])
	filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
	label="Model Types",
	elem_id="model_types",
	value=["pretrained","fine-tuned","chat-models","merges/moerges"])

	initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")

	def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
	filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
	affiliation_selected=affiliation_selected, size_selected=size_selected,
	precision_selected=precision_selected, type_selected=type_selected)
	return filtered_df

	initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
	["Intel Developer Cloud","AWS","Azure","GCP","Local"],
	["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
	[1,3,5,7,13,35,60,70,100],
	["fp8","fp16","bf16","int8","4bit"],
	["pretrained","fine-tuned","chat-models","merges/moerges"])

	gradio_df_display = gr.Dataframe(value=initial_filtered_df)

	filter_hw.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])
	filter_platform.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])
	filter_affiliation.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])
	filter_size.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])
	filter_precision.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])
	filter_type.change(fn=update_df,
	inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
	outputs=[gradio_df_display])


	with gr.TabItem("🧰 Train a Model", elem_id="getting-started", id=1):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
	with gr.TabItem("🚀 Deployment Tips", elem_id="deployment-tips", id=2):
	gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
	with gr.TabItem("👩‍💻 Developer Programs", elem_id="hardward-program", id=3):
	gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
	with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
	gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
	with gr.Row():
	gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
	with gr.Row():
	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name",
	info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",)
	revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main")
	model_type = gr.Dropdown(
	choices=["pretrained","fine-tuned","chat models","merges/moerges"],
	label="Model type",
	multiselect=False,
	value="pretrained",
	interactive=True,
	)

	hw_type = gr.Dropdown(
	choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
	label="Training Hardware",
	multiselect=False,
	value="Gaudi",
	interactive=True,
	)
	terms = gr.Checkbox(
	label="Check if you have read and agreed to terms and conditions associated with submitting\
	a model to the leaderboard.",
	value=False,
	interactive=True,
	)
	submit_button = gr.Button("🤗 Submit Eval 💻")
	submission_result = gr.Markdown()
	with gr.Column():
	precision = gr.Dropdown(
	choices=["fp32","fp16","bf16","int8","fp8", "int4"],
	label="Precision",
	multiselect=False,
	value="fp16",
	interactive=True,
	)
	weight_type = gr.Dropdown(
	choices=["Original", "Adapter", "Delta"],
	label="Weights type",
	multiselect=False,
	value="Original",
	interactive=True,
	info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have
	LoRA Adapters or Delta Weights.""",
	)
	training_infra = gr.Dropdown(
	choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
	label="Training Infrastructure",
	multiselect=False,
	value="Intel Developer Cloud",
	interactive=True,
	info = """ Select the infrastructure that the model was developed on.
	Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
	)
	affiliation = gr.Dropdown(
	choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
	label="Affiliation with Intel",
	multiselect=False,
	value="No Affiliation",
	interactive=True,
	info = """ Select "No Affiliation" if not part of any Intel programs.""",
	)
	base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")

	# gr.Markdown("Community Submissions Coming soon!")

	with gr.Accordion("📙 Citation", open=False):
	citation =gr.Textbox(value = CITATION_TEXT,
	lines=6,
	label="Use the following to cite this content")

	gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries.
	*Other names and brands may be claimed as the property of others.
	</p> </div>""")

	demo.launch(share=False)