Spaces:

woters
/

unlp

Paused

App Files Files Community

unlp / app.py

woters

label update

fbeaf3c 9 months ago

raw

history blame contribute delete

10.6 kB

	import gradio as gr
	import pandas as pd
	import random
	import firebase_admin
	from firebase_admin import credentials
	from firebase_admin import firestore
	from trueskill import Rating
	import trueskill

	CSV_FILE_PATH = "qa_pairs.csv"

	cred = credentials.Certificate("unlpboard_f.json")
	firebase_admin.initialize_app(cred)

	def list_models():
	df = pd.read_csv(CSV_FILE_PATH)
	return df['model'].unique().tolist()


	def list_questions():
	df = pd.read_csv(CSV_FILE_PATH)
	return df['question'].unique().tolist()

	def fetch_questions():
	questions_ref = db.collection('questions')
	docs = questions_ref.stream()
	questions_list = []
	for doc in docs:
	question = doc.to_dict()
	questions_list.append(question)
	return questions_list


	def display_answers(question, model1, model2, df):
	#df = pd.read_csv(CSV_FILE_PATH)
	answers = {
	model1: "No answer available for Model 1",
	model2: "No answer available for Model 2",
	}
	for model in [model1, model2]:
	filtered_df = df[(df['question'] == question) & (df['model'] == model)]
	if not filtered_df.empty:
	answers[model] = f"Answer:\n{filtered_df['answer'].iloc[0]}"
	return answers[model1], answers[model2]


	def update_b(q,m1,a1,m2,a2):
	print('Model1: ', random_model2)
	print('Model2: ', random_model2)
	q, m1, a1, m2, a2 = update_symbols(q, m1, a1, m2, a2)
	b1 = gr.Button("Vote for Model 1",interactive=True)
	b2 = gr.Button("It’s a tie!",interactive=True)
	b3 = gr.Button("Vote for Model 2",interactive=True)
	b4 = gr.Button("START!", visible = False)
	return q, m1, a1, m2, a2, b1, b2, b3, b4


	def update_symbols1(q,m1,a1,m2,a2):
	print("Voted for Model 1")
	log_vote(
	model1=m1,
	model2=m2,
	question=q,
	output1=a1,
	output2=a2,
	outcome=m1
	)
	votes_ref = db.collection('votes')
	vote_doc = votes_ref.document(m1).get()
	elo_count_1 = vote_doc.get('elo_rating')
	elo1 = Rating(elo_count_1)
	if vote_doc.exists:
	votes_ref.document(m1).update({'win_count': firestore.Increment(1)})
	else:
	votes_ref.document(m1).set({'win_count': 1})
	vote_doc = votes_ref.document(m2).get()
	elo_count_2 = vote_doc.get('elo_rating')
	elo2 = Rating(elo_count_2)
	elo1, elo2 = trueskill.rate_1vs1(elo1, elo2)
	votes_ref.document(m2).update({'elo_rating': elo2.mu})
	votes_ref.document(m1).update({'elo_rating': elo1.mu})
	if vote_doc.exists:
	votes_ref.document(m2).update({'loss_count': firestore.Increment(1)})
	else:
	votes_ref.document(m2).set({'loss_count': 1})

	return update_symbols(q, m1, a1, m2, a2)


	def update_symbols2(q, m1, a1, m2, a2):
	print("Voted for Spare")
	log_vote(
	model1=m1,
	model2=m2,
	question=q,
	output1=a1,
	output2=a2,
	outcome='tie'
	)
	#update_total_votes()
	return update_symbols(q, m1, a1, m2, a2)

	def update_symbols3(q, m1, a1, m2, a2):
	print("Voted for Model 2")
	log_vote(
	model1=m1,
	model2=m2,
	question=q,
	output1=a1,
	output2=a2,
	outcome=m2
	)
	votes_ref = db.collection('votes')
	vote_doc = votes_ref.document(m2).get()
	elo_count_2 = vote_doc.get('elo_rating')
	elo2 = Rating(elo_count_2)
	if vote_doc.exists:
	votes_ref.document(m2).update({'win_count': firestore.Increment(1)})
	else:
	votes_ref.document(m2).set({'win_count': 1})
	vote_doc = votes_ref.document(m1).get()
	elo_count_1 = vote_doc.get('elo_rating')
	elo1 = Rating(elo_count_1)
	elo1, elo2 = trueskill.rate_1vs1(elo2, elo1)
	votes_ref.document(m2).update({'elo_rating': elo2.mu})
	votes_ref.document(m1).update({'elo_rating': elo1.mu})
	if vote_doc.exists:
	votes_ref.document(m1).update({'loss_count': firestore.Increment(1)})
	else:
	votes_ref.document(m1).set({'loss_count': 1})
	#update_total_votes()
	return update_symbols(q, m1, a1, m2, a2)

	def update_symbols(q,m1,a1,m2,a2):
	random_question = random.choice(questions)
	random_model1, random_model2 = random.sample(models, 2)
	answer1, answer2 = display_answers(random_question, random_model1, random_model2, combined_df)
	m1 = gr.Markdown(f"{random_model1}", visible=False)
	a1 = gr.Markdown(answer1)
	q = gr.Markdown(f"{random_question}")
	m2 = gr.Markdown(f"{random_model2}", visible=False)
	a2 = gr.Markdown(answer2)
	return q,m1,a1,m2,a2

	def update_total_votes():
	votes_ref = db.collection('votes')
	vote_doc = votes_ref.document('total').get()
	if vote_doc.exists:
	votes_ref.document('total').update({'count': firestore.Increment(1)})
	else:
	votes_ref.document('total').set({'count': 1})

	def log_vote(model1, model2, question, output1, output2, outcome):
	# Reference to the Firestore collection where votes will be logged
	votes_log_ref = db.collection('votes_log')

	# Create a new document for this vote
	vote_data = {
	'model1': model1,
	'model2': model2,
	'question': question,
	'output1': output1,
	'output2': output2,
	'outcome': outcome,
	'timestamp': firestore.SERVER_TIMESTAMP
	}

	# Add the vote document to Firestore
	votes_log_ref.add(vote_data)


	def fetch_and_format_leaderboard():
	vote_counts_ref = db.collection('votes')
	docs = vote_counts_ref.stream()

	leaderboard = []
	for doc in docs:
	model_data = doc.to_dict()
	model_name = doc.id
	win_count = model_data.get('win_count', 0)
	loss_count = model_data.get('loss_count', 0)
	total_matches = win_count + loss_count
	win_rate = (win_count / total_matches) * 100 if total_matches > 0 else 0
	elo_rating = model_data.get('elo_rating', 0)

	leaderboard.append({
	"model": model_name,
	"win_rate": win_rate,
	"TrueSkill rating": elo_rating
	})

	# Sort the leaderboard by elo_rating in descending order
	leaderboard.sort(key=lambda x: x['win_rate'], reverse=True)
	leaderboard_df = pd.DataFrame(leaderboard)
	leaderboard_df['Rank'] = [1,2,3,4,5,6]#leaderboard_df['win_rate'].rank(method='max', ascending=False).astype(int)

	# Reorder columns to match your requirement
	leaderboard_df = leaderboard_df[['Rank', 'model', 'win_rate', 'TrueSkill rating'
	]]

	# Format the DataFrame as a string for display; you might adjust this part based on how Gradio expects the data
	# For Gradio, you might directly return the DataFrame instead of converting it to a string
	return leaderboard_df

	#questions = list_questions()



	db = firestore.client()

	def fetch_questions_c(collection):
	questions_ref = db.collection(collection)
	docs = questions_ref.stream()
	questions_list = []
	for doc in docs:
	question = doc.to_dict()
	questions_list.append(question)
	return questions_list

	codekobzar = fetch_questions_c('codekobzar')
	gpt = fetch_questions_c('gpt-4')
	llama = fetch_questions_c('llama-2-70b-chat')
	sherlocknorag = fetch_questions_c('sherlock-no-rag')
	sherlockrag = fetch_questions_c('sherlock-rag')
	ukrainenow = fetch_questions_c('ukrainenow')

	df1 = pd.DataFrame(codekobzar)
	df2 = pd.DataFrame(gpt)
	df3 = pd.DataFrame(llama)
	df4 = pd.DataFrame(sherlocknorag)
	df5 = pd.DataFrame(sherlockrag)
	df6 = pd.DataFrame(ukrainenow)
	df1['model'] = 'codekobzar'
	df2['model'] = 'gpt-4'
	df3['model'] = 'llama-2-70b-chat'
	df4['model'] = 'sherlock-no-rag'
	df5['model'] = 'sherlock-rag'
	df6['model'] = 'ukrainenow'


	combined_df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
	combined_df.drop('input',axis=1,inplace=True)
	combined_df.rename(columns={'instruction': 'question', 'output': 'answer'}, inplace=True)

	models = ['codekobzar','gpt-4','llama-2-70b-chat','sherlock-no-rag','sherlock-rag','ukrainenow']#list_models()

	votes_ref = db.collection('votes')
	for model in models:
	vote_doc = votes_ref.document(model).get()
	if vote_doc.exists:
	print("-------")
	else:
	votes_ref.document(model).set({'win_count': 0})
	votes_ref.document(model).set({'loss_count': 0})
	votes_ref.document(model).set({'elo_rating': 25})



	random_question = 'Click any button to start!'
	random_model1, random_model2 = '1', '2'
	answer1, answer2 = display_answers(random_question, random_model1, random_model2,combined_df)

	questions = []
	questions_ = fetch_questions()
	for question in questions_:
	questions.append(question['question_text'])

	votes_ref = db.collection('votes')


	def create_app():

	print('-----------------------')
	print(random_question)
	print(random_model1)
	print('-----!!!!!!!!!!!!!')

	with gr.Blocks() as app:
	q = gr.Markdown(f"### Question: {random_question}")

	with gr.Row():
	with gr.Column():
	m1 = gr.Markdown(f"{random_model1}", visible=False)
	a1 = gr.Markdown(answer1)

	with gr.Column():
	m2 = gr.Markdown(f"{random_model2}", visible=False)
	a2 = gr.Markdown(answer2)

	with gr.Row():
	b1 = gr.Button("Vote for Model 1",interactive=False)
	b2 = gr.Button("It’s a tie!",interactive=False)
	b3 = gr.Button("Vote for Model 2",interactive=False)
	with gr.Row():
	b4 = gr.Button("START!", interactive=True)
	#with gr.Row():
	# b5 = gr.Button("Show Leaderboard")

	initial_leaderboard_data = fetch_and_format_leaderboard()
	#leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",lines=30, visible=True)
	leaderboard_display = gr.Dataframe(value=initial_leaderboard_data, label="Leaderboard")
	#b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)

	b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])
	b1.click(update_symbols1, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2])
	b2.click(update_symbols2, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
	b3.click(update_symbols3, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
	leaderboard_button = gr.Button("Refresh Leaderboard")
	leaderboard_button.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)

	return app

	app = create_app()
	app.launch()