Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

llm-arch / pages /040_Test_Reporter.py

alfraser

Moved the trace reload behind the admin screen and login

f89cac3 9 months ago

raw

history blame

4.6 kB

	import pandas as pd
	import plotly.express as px
	import streamlit as st

	from src.architectures import *
	from src.st_helpers import st_setup
	from src.testing import TestGroup


	def show_stats(for_test_group: str):
	"""
	Set of nested functions to structure the display elements
	"""
	def show_elapsed_time_in_seconds_boxplot(stats):
	with st.expander("Elapsed End to End Time (seconds)"):
	data = []
	for arch in stats:
	for e in arch['elapsed']:
	data.append([arch['arch_name'], e / 1000])
	df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time'])
	fig = px.box(df, x="Architecture", y="Elapsed time")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	def show_response_length_boxplot(stats):
	with st.expander("Response length (count of characters)"):
	data = []
	for arch in stats:
	for rl in arch['response_len']:
	data.append([arch['arch_name'], rl])
	df = pd.DataFrame(data, columns=['Architecture', 'Response length'])
	fig = px.box(df, x="Architecture", y="Response length")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	def show_elapsed_time_by_arch_step_stacked_bar(stats):
	with st.expander("Mean elapsed time by architecture step (seconds)"):
	data = []
	for arch in stats:
	for step in arch['steps']:
	data.append([arch['arch_name'], step['step_name'], step['mean_elapsed'] / 1000])
	df = pd.DataFrame(data, columns=['Architecture', 'Step', 'Mean elapsed time'])
	fig = px.bar(df, x='Architecture', y='Mean elapsed time', color='Step', barmode='stack')
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	def show_time_vs_response_length_scatter_plot(stats):
	with st.expander("Time by response length (seconds/char)"):
	data = []
	for arch in stats:
	for elapsed, resp_len in zip(arch['elapsed'], arch['response_len']):
	data.append([arch['arch_name'], elapsed / 1000, resp_len])
	df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time', 'Response length'])
	fig = px.scatter(df, x='Elapsed time', y='Response length', color='Architecture')
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	def show_q_and_a_detail(stats):
	questions = list(stats[0]['q_and_a'].keys())
	num_archs = len(stats)
	with st.expander("Request/Response Details"):
	print(f'Displaying {len(questions)} questions and {num_archs} architectures')
	for q in questions:
	with st.expander(f"{q}"):
	for i in range(num_archs):
	st.divider()
	if q not in stats[i]['q_and_a']:
	answer = "No answer - test run possibly interrupted"
	else:
	answer = stats[i]['q_and_a'][q]
	st.write(f"{stats[i]['arch_name']}\n{answer}")


	test_group = TestGroup.for_test_group_tag(for_test_group)
	title = "No comment provided for group" if test_group.comment == "" else test_group.comment
	st.write(f"### {title}")
	st.write(f"Total of {test_group.num_tests} tests over {test_group.num_archs} architectures ({test_group.num_tests_per_arch} per architecture).")

	stats = test_group.summary_stats_by_arch()

	st.write("#### Statistics")
	show_elapsed_time_in_seconds_boxplot(stats)
	show_response_length_boxplot(stats)
	show_elapsed_time_by_arch_step_stacked_bar(stats)
	show_time_vs_response_length_scatter_plot(stats)

	st.write("#### Question and answer details")
	show_q_and_a_detail(stats)


	if st_setup('LLM Arch'):
	summary = st.container()
	with summary:
	st.write("# Test Reporter")

	TestGroup.load_all()

	selector, display = st.columns([2, 3])

	with selector:
	test_groups = list(TestGroup.all.values())
	test_groups.sort(key=lambda x: -x.start)
	options = [f'{tg.test_group}: {tg.comment}' for tg in test_groups]
	if selected := st.radio('Pick a test set to review', options=options, index=None):
	with display:
	show_stats(selected.split(":")[0])