Spaces:

loveblairsky
/

LLM-model-cards

Running

LLM-model-cards / Sample.py

Blair Yang

nwo able to record responses

91143ec 5 months ago

No virus

3.28 kB

	import random
	import numpy as np
	import os
	import json
	from Config import *
	import pandas as pd

	def format_card_str(card):
	entries = []
	for k, v in card.items():
	r = ''
	if isinstance(v, str):
	r += f'- {k}: {v}\n'
	elif isinstance(v, dict):
	r += f"- {k}: {v['overview']}\n"
	# r += f"- {k}:\n"
	if v['thinking_pattern'] + v['strength'] + v['weakness'] == '':
	continue
	r += f" - Thinking Patterns: {v['thinking_pattern']}\n"
	r += f" - Strength: {v['strength']}\n"
	r += f" - Weakness: {v['weakness']}\n"
	else:
	raise ValueError(f'Unknown type: {type(v)}')

	entries.append(r)
	return entries

	def format_qa_entry(qa):
	# concat question + choice
	question = qa['question']
	choices = qa['choices']
	ground_truth = qa['ground truth']
	choice_str = ''
	# choices are in 0 - n, convert to A - Z
	for i, c in enumerate(choices):
	choice_str += f"{chr(65+i)}. {c}\n"

	choice_str = choice_str[:-1]

	return question + '\n\n' + choice_str +'\n\n' + f'Ground Truth: {chr(65+ground_truth)}'


	def sample_random_entry(dataset='', topic='', model='', n=1):
	if dataset == '':
	dataset = random.choice(DATASETS)

	if topic == '':
	topic = random.choice(TOPICS[dataset])

	if model == '':
	model = random.choice(MODELS)

	# print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
	card_lst = sample_card(dataset, topic, model)
	qa, index = sample_QA_entry(dataset, topic, model)

	display_dict, info_dict = process_for_display(card_lst, qa)
	info_dict['index'] = index

	return display_dict, info_dict


	def process_for_display(card_lst, qa):
	qa_entry = format_qa_entry(qa)
	display_dict = {}
	display_dict['card'] = select_entry(qa_entry, card_lst)
	display_dict['qa'] = qa_entry
	info_dict = {**qa}
	info_dict.pop('question')
	info_dict.pop('choices')

	return display_dict, info_dict



	def select_entry(qa_entry, card_lst):
	# TODO: Automatically select most relevant criterion.
	# PLACE HOLDER, RETURN THE WHOEL THING
	return '\n'.join(card_lst[:2])


	def sample_card(dataset='', topic='', model='', card_cnt=2):
	card_index = random.randint(0, card_cnt-1)
	path = f'dataset/{dataset}/cards/{topic}/{topic}_{model}_{card_index}.jsonl'
	# load jsonl

	with open(path, 'r') as f:
	data = json.load(f)

	card = format_card_str(data)

	return card


	def sample_QA_entry(dataset='', topic='', model='', n=1):
	path = f'dataset/{dataset}/{topic}/{topic}_test.jsonl'
	# load jsonl
	# with jsonlines.open(path) as reader:
	# data = list(reader)
	# use json
	# load line by line
	with open(path, 'r') as f:
	data = [json.loads(line) for line in f.readlines()]

	# transfer into pandas
	df = pd.DataFrame(data)

	# select whose model equals model
	df = df[df['model'] == model]
	sample = df.sample(1)
	# Convert to dictionary
	sample_idx = sample.index[0]
	sample = sample.to_dict(orient='records')[0]
	return sample, sample_idx

	if __name__ == '__main__':
	sample_random_entry(n=5)