Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

llm-arch / src /testing.py

alfraser

Added the test reporting structure

82130cb over 1 year ago

raw

history blame

6.53 kB

	from __future__ import annotations # For self-referencing annotations

	import json
	import os

	from random import choices
	from typing import List, Dict, Optional

	from src.architectures import Architecture
	from src.common import data_dir


	class TestGenerator:
	"""
	Wrapper class to hold testing questions and serve up examples
	"""
	questions: List[str] = None

	@classmethod
	def load_questions(cls, reload=False) -> None:
	"""
	Load the available questions from the json file.
	Default to not re-loading if already done, but allow for the option to do so
	"""
	if cls.questions is not None and not reload:
	return
	question_file = os.path.join(data_dir, 'json', 'test_questions.json')
	with open(question_file, 'r') as f:
	question_json = json.load(f)
	cls.questions = question_json['questions']

	@classmethod
	def question_count(cls) -> int:
	cls.load_questions()
	return len(cls.questions)

	@classmethod
	def get_random_questions(cls, n: int):
	"""
	Return n random questions
	"""
	cls.load_questions()
	return choices(cls.questions, k=n)


	class ArchitectureRequestRecord:
	"""
	Representation of the test data associated with each invocation of an architecture
	"""
	all: List[ArchitectureRequestRecord] = None

	class ArchStep:
	"""
	Inner class to just hold this data
	"""
	def __init__(self, name: str, start: int, end: int):
	self.name = name
	self.start = start
	self.end = end
	self.elapsed = end - start

	def __init__(self, arch: str, response_len: int, start: int, end: int,
	elapsed: int, tags: List[str], test_group: Optional[str],
	comment: str, steps: List[ArchitectureRequestRecord.ArchStep]):
	self.arch = arch
	self.response_len = response_len
	self.start = start
	self.end = end
	self.elapsed = elapsed
	self.tags = tags
	self.test_group = test_group
	self.comment = comment
	self.steps = steps

	@classmethod
	def from_dict(cls, test: Dict) -> ArchitectureRequestRecord:
	arch = test['architecture']
	response_len = len(test['request']['response_evolution'][-1])
	start = test['trace']['steps'][0]['start_ms']
	end = test['trace']['steps'][-1]['end_ms']
	elapsed = end - start
	tags = test['test_tags']
	test_group = None
	for tag in tags:
	if tag.startswith("TestGroup"):
	test_group = tag
	comment = test['test_comment']
	steps = []
	for s in test['trace']['steps']:
	steps.append(ArchitectureRequestRecord.ArchStep(s['name'], s['start_ms'], s['end_ms']))
	return ArchitectureRequestRecord( arch, response_len, start, end, elapsed, tags, test_group, comment, steps)

	@classmethod
	def load_all(cls, reload=False) -> None:
	"""
	Load all the traces from json trace log
	"""
	if cls.all is None or reload:
	records = []
	test_traces = Architecture.get_trace_records()
	for trace in test_traces:
	records.append(ArchitectureRequestRecord.from_dict(trace))
	cls.all = records


	class TestGroup:
	all: Dict[str, List[ArchitectureRequestRecord]] = None

	def __init__(self, test_group:str):
	self.arch_request_records: List[ArchitectureRequestRecord] = []
	self.test_group = test_group
	self.comment = None
	self.start = None
	self.end = None
	self.elapsed = None
	self.architectures = set()

	@property
	def num_archs(self) -> int:
	return len(self.architectures)

	@property
	def num_tests(self) -> int:
	return len(self.arch_request_records)

	@property
	def num_tests_per_arch(self) -> int:
	# Should always be an even number but cast to int just in case
	return int(self.num_tests / self.num_archs)

	def arch_request_records_by_arch(self) -> Dict[List[ArchitectureRequestRecord]]:
	grouped = {}
	for arr in self.arch_request_records:
	if arr.arch not in grouped:
	grouped[arr.arch] = []
	grouped[arr.arch].append(arr)
	return grouped

	def summary_stats_by_arch(self) -> List[Dict]:
	arch_records = self.arch_request_records_by_arch()
	arch_names = list(arch_records.keys())
	arch_names.sort()
	stats = []
	for a in arch_names:
	stat_pack = {'arch_name': a, 'elapsed': [rec.elapsed for rec in arch_records[a]],
	'response_len': [rec.response_len for rec in arch_records[a]], 'steps': []}
	for i in range(len(arch_records[a][0].steps)):
	stat_pack['steps'].append({'step_name': arch_records[a][0].steps[i].name})
	num_recs = len(arch_records[a])
	total_elapsed = 0
	for j in range(num_recs):
	total_elapsed += arch_records[a][j].steps[i].elapsed
	stat_pack['steps'][-1]['mean_elapsed'] = total_elapsed / num_recs
	stats.append(stat_pack)
	return stats

	def add_record(self, arr: ArchitectureRequestRecord) -> None:
	if arr.test_group != self.test_group:
	raise ValueError("Attempted to group a test record into the wrong group")
	self.arch_request_records.append(arr)
	self.architectures.add(arr.arch)
	if self.comment is None:
	self.comment = arr.comment
	if self.start is None or self.start > arr.start:
	self.start = arr.start
	if self.end is None or self.end < arr.end:
	self.end = arr.end
	self.elapsed = self.end - self.start

	@classmethod
	def load_all(cls, reload=False):
	if cls.all is None or reload:
	ArchitectureRequestRecord.load_all(reload=reload)
	records = {}
	for arr in ArchitectureRequestRecord.all:
	if arr.test_group is not None:
	if arr.test_group not in records:
	records[arr.test_group] = TestGroup(arr.test_group)
	records[arr.test_group].add_record(arr)
	cls.all = records

	@classmethod
	def for_test_group_tag(cls, test_group_tag: str) -> TestGroup:
	cls.load_all()
	return cls.all[test_group_tag]