Spaces:

loveblairsky
/

LLM-model-cards

Runtime error

File size: 3,284 Bytes

import random
import numpy as np
import os
import json
from Config import *
import pandas as pd

def format_card_str(card):
    entries = []
    for k, v in card.items():
        r = ''
        if isinstance(v, str):
            r += f'- {k}: {v}\n'
        elif isinstance(v, dict):
            r += f"- {k}: {v['overview']}\n"
            # r += f"- {k}:\n"
            if v['thinking_pattern'] + v['strength'] + v['weakness'] == '':
                continue
            r += f"    - Thinking Patterns: {v['thinking_pattern']}\n"
            r += f"    - Strength: {v['strength']}\n"
            r += f"    - Weakness: {v['weakness']}\n"
        else:
            raise ValueError(f'Unknown type: {type(v)}')

        entries.append(r)
    return entries

def format_qa_entry(qa):
    # concat question + choice
    question = qa['question']
    choices = qa['choices']
    ground_truth = qa['ground truth']
    choice_str = ''
    # choices are in 0 - n, convert to A - Z
    for i, c in enumerate(choices):
        choice_str += f"{chr(65+i)}. {c}\n"
    
    choice_str = choice_str[:-1]
    
    return question + '\n\n' + choice_str +'\n\n' + f'Ground Truth: {chr(65+ground_truth)}'


def sample_random_entry(dataset='', topic='', model='', n=1):
    if dataset == '': 
        dataset = random.choice(DATASETS)
    
    if topic == '':
        topic = random.choice(TOPICS[dataset])
    
    if model == '':
        model = random.choice(MODELS)

    # print(f"Sampling {n} random entries from {dataset} - {topic} - {model}")
    card_lst = sample_card(dataset, topic, model)
    qa, index = sample_QA_entry(dataset, topic, model)

    display_dict, info_dict = process_for_display(card_lst, qa)
    info_dict['index'] = index

    return display_dict, info_dict


def process_for_display(card_lst, qa):
    qa_entry = format_qa_entry(qa)
    display_dict = {}
    display_dict['card'] = select_entry(qa_entry, card_lst)
    display_dict['qa'] = qa_entry
    info_dict = {**qa}
    info_dict.pop('question')
    info_dict.pop('choices')

    return display_dict, info_dict
    


def select_entry(qa_entry, card_lst):
    # TODO: Automatically select most relevant criterion.
    # PLACE HOLDER, RETURN THE WHOEL THING
    return '\n'.join(card_lst[:2])


def sample_card(dataset='', topic='', model='', card_cnt=2):
    card_index = random.randint(0, card_cnt-1)
    path = f'dataset/{dataset}/cards/{topic}/{topic}_{model}_{card_index}.jsonl'
    # load jsonl 

    with open(path, 'r') as f:
        data = json.load(f)
    
    card = format_card_str(data)

    return card
   

def sample_QA_entry(dataset='', topic='', model='', n=1):
    path = f'dataset/{dataset}/{topic}/{topic}_test.jsonl'
    # load jsonl 
    # with jsonlines.open(path) as reader:
    #     data = list(reader)
    # use json
    # load line by line
    with open(path, 'r') as f:
        data = [json.loads(line) for line in f.readlines()]
    
    # transfer into pandas
    df = pd.DataFrame(data)

    # select whose model equals model
    df = df[df['model'] == model]
    sample = df.sample(1)
    # Convert to dictionary
    sample_idx = sample.index[0]
    sample = sample.to_dict(orient='records')[0]
    return sample, sample_idx

if __name__ == '__main__':
    sample_random_entry(n=5)