Spaces:
Runtime error
Runtime error
Added a test runner page which allows you to run a batch of test from the UI
Browse files- pages/030_Test_Runner.py +45 -0
pages/030_Test_Runner.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from src.architectures import *
|
4 |
+
from src.common import generate_group_tag
|
5 |
+
from src.testing import TestGenerator
|
6 |
+
from src.st_helpers import st_setup
|
7 |
+
|
8 |
+
|
9 |
+
if Architecture.architectures is None:
|
10 |
+
Architecture.load_architectures()
|
11 |
+
|
12 |
+
if st_setup('LLM Arch'):
|
13 |
+
summary = st.container()
|
14 |
+
with summary:
|
15 |
+
st.write("# Test Runner")
|
16 |
+
st.write("## Run a new test")
|
17 |
+
st.write("### Comment:")
|
18 |
+
comment = st.text_input("Optional comment for the test")
|
19 |
+
|
20 |
+
st.write("### Architectures to include:")
|
21 |
+
selected_archs = st.multiselect(label="Architectures", options=[a.name for a in Architecture.architectures])
|
22 |
+
|
23 |
+
st.write("### Number of questions to ask:")
|
24 |
+
q_count = st.slider(label="Number of questions", min_value=1, max_value=TestGenerator.question_count(), step=1)
|
25 |
+
|
26 |
+
st.write("### Tag:")
|
27 |
+
tag = generate_group_tag()
|
28 |
+
st.write(f'Test will be tagged as "{tag}" - record this for easy searching later')
|
29 |
+
|
30 |
+
total_tests = len(selected_archs) * q_count
|
31 |
+
st.write("### Run:")
|
32 |
+
st.write(f"**{total_tests}** total tests will be run")
|
33 |
+
if st.button("**Run**", disabled=(total_tests==0)):
|
34 |
+
progress = st.progress(0.0, text="Running tests...")
|
35 |
+
questions = TestGenerator.get_random_questions(q_count)
|
36 |
+
num_complete = 0
|
37 |
+
for arch_name in selected_archs:
|
38 |
+
architecture = Architecture.get_architecture(arch_name)
|
39 |
+
for q in questions:
|
40 |
+
architecture(ArchitectureRequest(q), trace_tags=[tag, "TestRunner"], trace_comment=comment)
|
41 |
+
num_complete += 1
|
42 |
+
if num_complete == total_tests:
|
43 |
+
progress.empty()
|
44 |
+
else:
|
45 |
+
progress.progress(num_complete/total_tests, f"Run {num_complete} of {total_tests} tests...")
|