LLM-model-cards / utils.py
Blair Yang
Revise config
195be44
raw
history blame
3.08 kB
import json
import os
import random
from typing import Tuple, Dict
import jsonlines
from Config import *
from models import select_model
def read_all(path: str) -> str:
with open(path, 'r') as f:
return f.read()
class Card:
json_obj: Dict
def __init__(self, path: str):
self.json_obj = json.load(open(path, 'r'))
def get_markdown_str(self) -> str:
m = ""
for k, v in self.json_obj.items():
if isinstance(v, str):
m += f'- {k}: {v}\n'
elif isinstance(v, dict):
m += f"- {k}: {v['overview']}\n"
if v['thinking_pattern'] + v['strength'] + v['weakness'] == '':
continue
m += f" - Thinking Patterns: {v['thinking_pattern']}\n"
m += f" - Strength: {v['strength']}\n"
m += f" - Weakness: {v['weakness']}\n"
else:
raise ValueError(f'Unknown type: {type(v)}')
return m
def __str__(self):
return self.get_markdown_str()
def sample_random_card(dataset: str, topic: str, model: str) -> Tuple[Card, str]:
"""
Returns a random card and the file name of the card.
"""
cards_dir = f"{CARD_DIR}/{dataset}/{topic}"
prefix = f"{model}"
# list all .json files start with prefix in cards_dir
files = [f for f in os.listdir(cards_dir)
if f.startswith(prefix) and f.endswith(".json")]
assert len(files) > 0, f"No card found for {dataset} - {topic} - {model}"
# randomly select a file
card_file = random.choice(files)
card_path = os.path.join(cards_dir, card_file)
return Card(card_path), card_file
def format_qa_entry(qa: Dict) -> str:
question = qa['question']
choices = qa['choices']
ground_truth = qa['ground truth']
choice_str = ''
# choices are in 0 - n, convert to A - Z
for i, c in enumerate(choices):
choice_str += f"{chr(65 + i)}. {c}\n"
choice_str = choice_str[:-1]
return question + '\n\n' + choice_str + '\n\n' + f'Ground Truth: {chr(65 + ground_truth)}'
def sample_random_qa(dataset: str, topic: str, model: str) -> Tuple[str, str, bool]:
"""
Returns qa str, model's answer, and whether the model's answer is correct.
"""
# get qa str, model's answer
qa_path = f"{DATASET_DIR}/{dataset}/{topic}/{model}_test.jsonl"
with jsonlines.open(qa_path) as reader:
lines = list(reader)
item = random.choice(lines)
qa_str = format_qa_entry(item)
model_reason = item["completion"]
model_choice = chr(65 + item["verdict"])
completion = model_reason + "\n\n" + f"Choice: {model_choice}"
return qa_str, completion, item["ground truth"] == item["verdict"]
def summarize_card(summarizer: str, card: Card, qa: str) -> str:
system_prompt = read_all("prompts/summarize/system.txt")
user_prompt = read_all("prompts/summarize/user.txt").format(
card=str(card), qa=qa
)
model = select_model(summarizer, system_prompt)
r = model(user_prompt, cache=True)
return r