llm-arch / src /testing.py
alfraser's picture
Added the test reporting structure
82130cb
raw
history blame
6.53 kB
from __future__ import annotations # For self-referencing annotations
import json
import os
from random import choices
from typing import List, Dict, Optional
from src.architectures import Architecture
from src.common import data_dir
class TestGenerator:
"""
Wrapper class to hold testing questions and serve up examples
"""
questions: List[str] = None
@classmethod
def load_questions(cls, reload=False) -> None:
"""
Load the available questions from the json file.
Default to not re-loading if already done, but allow for the option to do so
"""
if cls.questions is not None and not reload:
return
question_file = os.path.join(data_dir, 'json', 'test_questions.json')
with open(question_file, 'r') as f:
question_json = json.load(f)
cls.questions = question_json['questions']
@classmethod
def question_count(cls) -> int:
cls.load_questions()
return len(cls.questions)
@classmethod
def get_random_questions(cls, n: int):
"""
Return n random questions
"""
cls.load_questions()
return choices(cls.questions, k=n)
class ArchitectureRequestRecord:
"""
Representation of the test data associated with each invocation of an architecture
"""
all: List[ArchitectureRequestRecord] = None
class ArchStep:
"""
Inner class to just hold this data
"""
def __init__(self, name: str, start: int, end: int):
self.name = name
self.start = start
self.end = end
self.elapsed = end - start
def __init__(self, arch: str, response_len: int, start: int, end: int,
elapsed: int, tags: List[str], test_group: Optional[str],
comment: str, steps: List[ArchitectureRequestRecord.ArchStep]):
self.arch = arch
self.response_len = response_len
self.start = start
self.end = end
self.elapsed = elapsed
self.tags = tags
self.test_group = test_group
self.comment = comment
self.steps = steps
@classmethod
def from_dict(cls, test: Dict) -> ArchitectureRequestRecord:
arch = test['architecture']
response_len = len(test['request']['response_evolution'][-1])
start = test['trace']['steps'][0]['start_ms']
end = test['trace']['steps'][-1]['end_ms']
elapsed = end - start
tags = test['test_tags']
test_group = None
for tag in tags:
if tag.startswith("TestGroup"):
test_group = tag
comment = test['test_comment']
steps = []
for s in test['trace']['steps']:
steps.append(ArchitectureRequestRecord.ArchStep(s['name'], s['start_ms'], s['end_ms']))
return ArchitectureRequestRecord( arch, response_len, start, end, elapsed, tags, test_group, comment, steps)
@classmethod
def load_all(cls, reload=False) -> None:
"""
Load all the traces from json trace log
"""
if cls.all is None or reload:
records = []
test_traces = Architecture.get_trace_records()
for trace in test_traces:
records.append(ArchitectureRequestRecord.from_dict(trace))
cls.all = records
class TestGroup:
all: Dict[str, List[ArchitectureRequestRecord]] = None
def __init__(self, test_group:str):
self.arch_request_records: List[ArchitectureRequestRecord] = []
self.test_group = test_group
self.comment = None
self.start = None
self.end = None
self.elapsed = None
self.architectures = set()
@property
def num_archs(self) -> int:
return len(self.architectures)
@property
def num_tests(self) -> int:
return len(self.arch_request_records)
@property
def num_tests_per_arch(self) -> int:
# Should always be an even number but cast to int just in case
return int(self.num_tests / self.num_archs)
def arch_request_records_by_arch(self) -> Dict[List[ArchitectureRequestRecord]]:
grouped = {}
for arr in self.arch_request_records:
if arr.arch not in grouped:
grouped[arr.arch] = []
grouped[arr.arch].append(arr)
return grouped
def summary_stats_by_arch(self) -> List[Dict]:
arch_records = self.arch_request_records_by_arch()
arch_names = list(arch_records.keys())
arch_names.sort()
stats = []
for a in arch_names:
stat_pack = {'arch_name': a, 'elapsed': [rec.elapsed for rec in arch_records[a]],
'response_len': [rec.response_len for rec in arch_records[a]], 'steps': []}
for i in range(len(arch_records[a][0].steps)):
stat_pack['steps'].append({'step_name': arch_records[a][0].steps[i].name})
num_recs = len(arch_records[a])
total_elapsed = 0
for j in range(num_recs):
total_elapsed += arch_records[a][j].steps[i].elapsed
stat_pack['steps'][-1]['mean_elapsed'] = total_elapsed / num_recs
stats.append(stat_pack)
return stats
def add_record(self, arr: ArchitectureRequestRecord) -> None:
if arr.test_group != self.test_group:
raise ValueError("Attempted to group a test record into the wrong group")
self.arch_request_records.append(arr)
self.architectures.add(arr.arch)
if self.comment is None:
self.comment = arr.comment
if self.start is None or self.start > arr.start:
self.start = arr.start
if self.end is None or self.end < arr.end:
self.end = arr.end
self.elapsed = self.end - self.start
@classmethod
def load_all(cls, reload=False):
if cls.all is None or reload:
ArchitectureRequestRecord.load_all(reload=reload)
records = {}
for arr in ArchitectureRequestRecord.all:
if arr.test_group is not None:
if arr.test_group not in records:
records[arr.test_group] = TestGroup(arr.test_group)
records[arr.test_group].add_record(arr)
cls.all = records
@classmethod
def for_test_group_tag(cls, test_group_tag: str) -> TestGroup:
cls.load_all()
return cls.all[test_group_tag]