Spaces:
Runtime error
Runtime error
File size: 4,893 Bytes
82130cb 5044033 82130cb 5044033 82130cb a9d1d49 82130cb a9d1d49 943d243 a9d1d49 943d243 a9d1d49 82130cb 9cec719 a9d1d49 5044033 9cec719 a9d1d49 9cec719 5044033 f89cac3 82130cb 5ea3cc9 82130cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import pandas as pd
import plotly.express as px
import streamlit as st
from src.architectures import *
from src.st_helpers import st_setup
from src.testing import TestGroup
def show_stats(for_test_group: str):
"""
Set of nested functions to structure the display elements
"""
def show_elapsed_time_in_seconds_boxplot(stats):
with st.expander("**Elapsed End to End Time (seconds)**"):
data = []
for arch in stats:
for e in arch['elapsed']:
data.append([arch['arch_name'], e / 1000])
df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time'])
fig = px.box(df, x="Architecture", y="Elapsed time")
fig.update_xaxes(tickangle=-90)
st.plotly_chart(fig, use_container_width=True)
def show_response_length_boxplot(stats):
with st.expander("**Response length (count of characters)**"):
data = []
for arch in stats:
for rl in arch['response_len']:
data.append([arch['arch_name'], rl])
df = pd.DataFrame(data, columns=['Architecture', 'Response length'])
fig = px.box(df, x="Architecture", y="Response length")
fig.update_xaxes(tickangle=-90)
st.plotly_chart(fig, use_container_width=True)
def show_elapsed_time_by_arch_step_stacked_bar(stats):
with st.expander("**Mean elapsed time by architecture step (seconds)**"):
data = []
for arch in stats:
for step in arch['steps']:
data.append([arch['arch_name'], step['step_name'], step['mean_elapsed'] / 1000])
df = pd.DataFrame(data, columns=['Architecture', 'Step', 'Mean elapsed time'])
fig = px.bar(df, x='Architecture', y='Mean elapsed time', color='Step', barmode='stack')
fig.update_xaxes(tickangle=-90)
st.plotly_chart(fig, use_container_width=True)
def show_time_vs_response_length_scatter_plot(stats):
with st.expander("**Time by response length (seconds/char)**"):
data = []
for arch in stats:
for elapsed, resp_len in zip(arch['elapsed'], arch['response_len']):
data.append([arch['arch_name'], elapsed / 1000, resp_len])
df = pd.DataFrame(data, columns=['Architecture', 'Elapsed time', 'Response length'])
chart_area = st.container()
if st.checkbox("Show regression lines"):
fig = px.scatter(df, x='Elapsed time', y='Response length', color='Architecture', trendline='ols', trendline_color_override='red')
else:
fig = px.scatter(df, x='Elapsed time', y='Response length', color='Architecture')
fig.update_xaxes(tickangle=-90)
with chart_area:
st.plotly_chart(fig, use_container_width=True)
def show_q_and_a_detail(stats):
questions = list(stats[0]['q_and_a'].keys())
num_archs = len(stats)
with st.expander("**Request/Response Details**"):
print(f'Displaying {len(questions)} questions and {num_archs} architectures')
for q in questions:
with st.expander(f"**{q}**"):
for i in range(num_archs):
st.divider()
if q not in stats[i]['q_and_a']:
answer = "No answer - test run possibly interrupted"
else:
answer = stats[i]['q_and_a'][q]
st.write(f"**{stats[i]['arch_name']}**\n{answer}")
test_group = TestGroup.for_test_group_tag(for_test_group)
title = "No comment provided for group" if test_group.comment == "" else test_group.comment
st.write(f"### {title}")
st.write(f"Total of {test_group.num_tests} tests over {test_group.num_archs} architectures ({test_group.num_tests_per_arch} per architecture).")
stats = test_group.summary_stats_by_arch()
st.write("#### Statistics")
show_elapsed_time_in_seconds_boxplot(stats)
show_response_length_boxplot(stats)
show_elapsed_time_by_arch_step_stacked_bar(stats)
show_time_vs_response_length_scatter_plot(stats)
st.write("#### Question and answer details")
show_q_and_a_detail(stats)
if st_setup('LLM Arch'):
summary = st.container()
with summary:
st.write("# Test Reporter")
TestGroup.load_all()
selector, display = st.columns([2, 3])
with selector:
test_groups = list(TestGroup.all.values())
test_groups.sort(key=lambda x: -x.start)
options = [f'{tg.test_group}: {tg.comment}' for tg in test_groups]
if selected := st.radio('**Pick a test set to review**', options=options, index=None):
with display:
show_stats(selected.split(":")[0])
|