ObjectRelator / LLaVA /llava /eval /generate_webpage_data_from_table.py

Upload folder using huggingface_hub

fe6c2e4 verified 3 months ago

4.09 kB

	"""Generate json file for webpage."""
	import json
	import os
	import re

	# models = ['llama', 'alpaca', 'gpt35', 'bard']
	models = ['vicuna']


	def read_jsonl(path: str, key: str=None):
	data = []
	with open(os.path.expanduser(path)) as f:
	for line in f:
	if not line:
	continue
	data.append(json.loads(line))
	if key is not None:
	data.sort(key=lambda x: x[key])
	data = {item[key]: item for item in data}
	return data


	def trim_hanging_lines(s: str, n: int) -> str:
	s = s.strip()
	for _ in range(n):
	s = s.split('\n', 1)[1].strip()
	return s


	if __name__ == '__main__':
	questions = read_jsonl('table/question.jsonl', key='question_id')

	# alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id')
	# bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id')
	# gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id')
	# llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id')
	vicuna_answers = read_jsonl('table/answer/answer_vicuna-13b.jsonl', key='question_id')
	ours_answers = read_jsonl('table/results/llama-13b-hf-alpaca.jsonl', key='question_id')

	review_vicuna = read_jsonl('table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl', key='question_id')
	# review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id')
	# review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id')
	# review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id')
	# review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id')

	records = []
	for qid in questions.keys():
	r = {
	'id': qid,
	'category': questions[qid]['category'],
	'question': questions[qid]['text'],
	'answers': {
	# 'alpaca': alpaca_answers[qid]['text'],
	# 'llama': llama_answers[qid]['text'],
	# 'bard': bard_answers[qid]['text'],
	# 'gpt35': gpt35_answers[qid]['text'],
	'vicuna': vicuna_answers[qid]['text'],
	'ours': ours_answers[qid]['text'],
	},
	'evaluations': {
	# 'alpaca': review_alpaca[qid]['text'],
	# 'llama': review_llama[qid]['text'],
	# 'bard': review_bard[qid]['text'],
	'vicuna': review_vicuna[qid]['content'],
	# 'gpt35': review_gpt35[qid]['text'],
	},
	'scores': {
	'vicuna': review_vicuna[qid]['tuple'],
	# 'alpaca': review_alpaca[qid]['score'],
	# 'llama': review_llama[qid]['score'],
	# 'bard': review_bard[qid]['score'],
	# 'gpt35': review_gpt35[qid]['score'],
	},
	}

	# cleanup data
	cleaned_evals = {}
	for k, v in r['evaluations'].items():
	v = v.strip()
	lines = v.split('\n')
	# trim the first line if it's a pair of numbers
	if re.match(r'\d+[, ]+\d+', lines[0]):
	lines = lines[1:]
	v = '\n'.join(lines)
	cleaned_evals[k] = v.replace('Assistant 1', "Assistant 1").replace('Assistant 2', 'Assistant 2')

	r['evaluations'] = cleaned_evals
	records.append(r)

	# Reorder the records, this is optional
	for r in records:
	if r['id'] <= 20:
	r['id'] += 60
	else:
	r['id'] -= 20
	for r in records:
	if r['id'] <= 50:
	r['id'] += 10
	elif 50 < r['id'] <= 60:
	r['id'] -= 50
	for r in records:
	if r['id'] == 7:
	r['id'] = 1
	elif r['id'] < 7:
	r['id'] += 1

	records.sort(key=lambda x: x['id'])

	# Write to file
	with open('webpage/data.json', 'w') as f:
	json.dump({'questions': records, 'models': models}, f, indent=2)