Spaces:
Runtime error
Runtime error
import random | |
import numpy as np | |
import os | |
import json | |
from Config import * | |
import pandas as pd | |
def format_card_str(card): | |
entries = [] | |
for k, v in card.items(): | |
r = '' | |
if isinstance(v, str): | |
r += f'- {k}: {v}\n' | |
elif isinstance(v, dict): | |
r += f"- {k}: {v['overview']}\n" | |
# r += f"- {k}:\n" | |
if v['thinking_pattern'] + v['strength'] + v['weakness'] == '': | |
continue | |
r += f" - Thinking Patterns: {v['thinking_pattern']}\n" | |
r += f" - Strength: {v['strength']}\n" | |
r += f" - Weakness: {v['weakness']}\n" | |
else: | |
raise ValueError(f'Unknown type: {type(v)}') | |
entries.append(r) | |
return entries | |
def format_qa_entry(qa): | |
# concat question + choice | |
question = qa['question'] | |
choices = qa['choices'] | |
ground_truth = qa['ground truth'] | |
choice_str = '' | |
# choices are in 0 - n, convert to A - Z | |
for i, c in enumerate(choices): | |
choice_str += f"{chr(65+i)}. {c}\n" | |
choice_str = choice_str[:-1] | |
return question + '\n\n' + choice_str +'\n\n' + f'Ground Truth: {chr(65+ground_truth)}' | |
def sample_random_entry(dataset='', topic='', model='', n=1): | |
if dataset == '': | |
dataset = random.choice(DATASETS) | |
if topic == '': | |
topic = random.choice(TOPICS[dataset]) | |
if model == '': | |
model = random.choice(MODELS) | |
# print(f"Sampling {n} random entries from {dataset} - {topic} - {model}") | |
card_lst = sample_card(dataset, topic, model) | |
qa, index = sample_QA_entry(dataset, topic, model) | |
display_dict, info_dict = process_for_display(card_lst, qa) | |
info_dict['index'] = index | |
return display_dict, info_dict | |
def process_for_display(card_lst, qa): | |
qa_entry = format_qa_entry(qa) | |
display_dict = {} | |
display_dict['card'] = select_entry(qa_entry, card_lst) | |
display_dict['qa'] = qa_entry | |
info_dict = {**qa} | |
info_dict.pop('question') | |
info_dict.pop('choices') | |
return display_dict, info_dict | |
def select_entry(qa_entry, card_lst): | |
# TODO: Automatically select most relevant criterion. | |
# PLACE HOLDER, RETURN THE WHOEL THING | |
return '\n'.join(card_lst[:2]) | |
def sample_card(dataset='', topic='', model='', card_cnt=2): | |
card_index = random.randint(0, card_cnt-1) | |
path = f'dataset/{dataset}/cards/{topic}/{topic}_{model}_{card_index}.jsonl' | |
# load jsonl | |
with open(path, 'r') as f: | |
data = json.load(f) | |
card = format_card_str(data) | |
return card | |
def sample_QA_entry(dataset='', topic='', model='', n=1): | |
path = f'dataset/{dataset}/{topic}/{topic}_test.jsonl' | |
# load jsonl | |
# with jsonlines.open(path) as reader: | |
# data = list(reader) | |
# use json | |
# load line by line | |
with open(path, 'r') as f: | |
data = [json.loads(line) for line in f.readlines()] | |
# transfer into pandas | |
df = pd.DataFrame(data) | |
# select whose model equals model | |
df = df[df['model'] == model] | |
sample = df.sample(1) | |
# Convert to dictionary | |
sample_idx = sample.index[0] | |
sample = sample.to_dict(orient='records')[0] | |
return sample, sample_idx | |
if __name__ == '__main__': | |
sample_random_entry(n=5) |