"""Generate json file for webpage.""" import json import os import re # models = ['llama', 'alpaca', 'gpt35', 'bard'] models = ['vicuna'] def read_jsonl(path: str, key: str=None): data = [] with open(os.path.expanduser(path)) as f: for line in f: if not line: continue data.append(json.loads(line)) if key is not None: data.sort(key=lambda x: x[key]) data = {item[key]: item for item in data} return data def trim_hanging_lines(s: str, n: int) -> str: s = s.strip() for _ in range(n): s = s.split('\n', 1)[1].strip() return s if __name__ == '__main__': questions = read_jsonl('table/question.jsonl', key='question_id') # alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id') # bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id') # gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id') # llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id') vicuna_answers = read_jsonl('table/answer/answer_vicuna-13b.jsonl', key='question_id') ours_answers = read_jsonl('table/results/llama-13b-hf-alpaca.jsonl', key='question_id') review_vicuna = read_jsonl('table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl', key='question_id') # review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id') # review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id') # review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id') # review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id') records = [] for qid in questions.keys(): r = { 'id': qid, 'category': questions[qid]['category'], 'question': questions[qid]['text'], 'answers': { # 'alpaca': alpaca_answers[qid]['text'], # 'llama': llama_answers[qid]['text'], # 'bard': bard_answers[qid]['text'], # 'gpt35': gpt35_answers[qid]['text'], 'vicuna': vicuna_answers[qid]['text'], 'ours': ours_answers[qid]['text'], }, 'evaluations': { # 'alpaca': review_alpaca[qid]['text'], # 'llama': review_llama[qid]['text'], # 'bard': review_bard[qid]['text'], 'vicuna': review_vicuna[qid]['content'], # 'gpt35': review_gpt35[qid]['text'], }, 'scores': { 'vicuna': review_vicuna[qid]['tuple'], # 'alpaca': review_alpaca[qid]['score'], # 'llama': review_llama[qid]['score'], # 'bard': review_bard[qid]['score'], # 'gpt35': review_gpt35[qid]['score'], }, } # cleanup data cleaned_evals = {} for k, v in r['evaluations'].items(): v = v.strip() lines = v.split('\n') # trim the first line if it's a pair of numbers if re.match(r'\d+[, ]+\d+', lines[0]): lines = lines[1:] v = '\n'.join(lines) cleaned_evals[k] = v.replace('Assistant 1', "**Assistant 1**").replace('Assistant 2', '**Assistant 2**') r['evaluations'] = cleaned_evals records.append(r) # Reorder the records, this is optional for r in records: if r['id'] <= 20: r['id'] += 60 else: r['id'] -= 20 for r in records: if r['id'] <= 50: r['id'] += 10 elif 50 < r['id'] <= 60: r['id'] -= 50 for r in records: if r['id'] == 7: r['id'] = 1 elif r['id'] < 7: r['id'] += 1 records.sort(key=lambda x: x['id']) # Write to file with open('webpage/data.json', 'w') as f: json.dump({'questions': records, 'models': models}, f, indent=2)