Spaces:
Runtime error
Runtime error
File size: 3,086 Bytes
abedf13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import jsonlines
import random
import numpy as np
import os
import json
from Config import *
import pandas as pd
def format_card_str(card):
entries = []
for k, v in card.items():
r = ''
if isinstance(v, str):
r += f'- {k}: {v}\n'
elif isinstance(v, dict):
r += f"- {k}: {v['overview']}\n"
# r += f"- {k}:\n"
if v['thinking_pattern'] + v['strength'] + v['weakness'] == '':
continue
r += f" - Thinking Patterns: {v['thinking_pattern']}\n"
r += f" - Strength: {v['strength']}\n"
r += f" - Weakness: {v['weakness']}\n"
else:
raise ValueError(f'Unknown type: {type(v)}')
entries.append(r)
return entries
def format_qa_entry(qa):
# concat question + choice
question = qa['question']
choices = qa['choices']
ground_truth = qa['ground truth']
choice_str = ''
# choices are in 0 - n, convert to A - Z
for i, c in enumerate(choices):
choice_str += f"{chr(65+i)}. {c}\n"
choice_str = choice_str[:-1]
return question + '\n\n' + choice_str +'\n\n' + f'Ground Truth: {chr(65+ground_truth)}'
def sample_random_entry(dataset='', topic='', model='', n=1):
if dataset == '':
dataset = random.choice(DATASETS)
if topic == '':
topic = random.choice(TOPICS[dataset])
if model == '':
model = random.choice(MODELS)
# print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
card_lst = sample_card(dataset, topic, model)
qa = sample_QA_entry(dataset, topic, model)
display_dict, info_dict = process_for_display(card_lst, qa)
return display_dict, info_dict
def process_for_display(card_lst, qa):
qa_entry = format_qa_entry(qa)
display_dict = {}
display_dict['card'] = select_entry(qa_entry, card_lst)
display_dict['qa'] = qa_entry
info_dict = {**qa}
info_dict.pop('question')
info_dict.pop('choices')
return display_dict, info_dict
def select_entry(qa_entry, card_lst):
# TODO: Automatically select most relevant criterion.
# PLACE HOLDER, RETURN THE WHOEL THING
return '\n'.join(card_lst[:2])
def sample_card(dataset='', topic='', model='', card_cnt=2):
card_index = random.randint(0, card_cnt-1)
path = f'dataset/{dataset}/cards/{topic}/{topic}_{model}_{card_index}.jsonl'
# load jsonl
with open(path, 'r') as f:
data = json.load(f)
card = format_card_str(data)
return card
def sample_QA_entry(dataset='', topic='', model='', n=1):
path = f'dataset/{dataset}/{topic}/{topic}_test.jsonl'
# load jsonl
with jsonlines.open(path) as reader:
data = list(reader)
# transfer into pandas
df = pd.DataFrame(data)
# select whose model equals model
df = df[df['model'] == model]
sample = df.sample(1)
# Convert to dictionary
sample = sample.to_dict(orient='records')[0]
return (sample)
if __name__ == '__main__':
sample_random_entry(n=5) |