Spaces:

arbml
/

cidar_human_eval

Sleeping

File size: 4,841 Bytes

import os
os.system("pip install pymongo")
import gradio as gr
import pandas as pd
import random
from collections import defaultdict
import json
import os 
import sys
from database import save_response

css = """
.rtl{
    text-align: right;
}
.selectize-dropdown, .selectize-input {
    direction: rtl !important;
}
"""

file_path = 'output/merged.json'
df = pd.read_json(file_path, orient='records', lines=False)

# that keeps track of how many times each question has been used
question_count = {index: 0 for index in df.index}
model_rankings = defaultdict(lambda: {'1st': 0, '2nd': 0, '3rd': 0})

def get_rank_suffix(rank):
    if 11 <= rank <= 13:
        return 'th'
    else:
        suffixes = {1: 'st', 2: 'nd', 3: 'rd'}
        return suffixes.get(rank % 10, 'th')

def process_rankings(user_rankings):
    print("Processing Rankings:", user_rankings)  # Debugging print
    for answer_id, rank in user_rankings:
        model = answer_id.split('_')[0]  # Extracting the model name from the answer_id
        rank_suffix = get_rank_suffix(rank)
        model_rankings[model][f'{rank}{rank_suffix}'] += 1  # Using the correct suffix based on the rank
        model_rankings_dict = dict(model_rankings)
    
    save_response(model_rankings_dict)
    # file_path = 'users_ranking.txt'
    # with open(file_path, 'a') as file:
    #     model_rankings_dict = dict(model_rankings)
    #     json.dump(model_rankings_dict, file)
    #     file.write('\n')  # Add a newline to separate entries
    print("Updated Model Rankings:", model_rankings)  # Debugging print
    return

def get_questions_and_answers():
    available_questions = [index for index, count in question_count.items() if count < 3]
    selected_indexes = random.sample(available_questions, min(4, len(available_questions)))
    for index in selected_indexes:
        question_count[index] += 1

    questions_and_answers = []
    for index in selected_indexes:
        question = df.loc[index, 'instruction']
        answers_with_models = [
            (df.loc[index, 'cidar_output'], 'CIDAR'),
            (df.loc[index, 'chat_output'], 'CHAT'),
            (df.loc[index, 'alpagasus_output'], 'ALPAGASUS')
        ]
        random.shuffle(answers_with_models)  # Shuffle answers with their IDs
        questions_and_answers.append((question, answers_with_models))

    return questions_and_answers

def rank_interface():
    questions = get_questions_and_answers()
    
    # Create three dropdowns for each question for 1st, 2nd, and 3rd choices
    inputs = []

    for question, answers in questions:
        # Use an HTML component to display the question
        inputs.append(gr.Markdown(rtl=True, value= question))
        
        answers_text = [answer for answer, _ in answers]

        # Append three dropdowns for rankings without repeating the question
        inputs.append(gr.Dropdown(elem_classes="rtl", choices=["...اختر"] + answers_text, label="الاختيار الأول"))
        inputs.append(gr.Dropdown(elem_classes="rtl", choices=["...اختر"] + answers_text, label="الاختيار الثاني"))
        inputs.append(gr.Dropdown(elem_classes="rtl", choices=["...اختر"] + answers_text, label="الاختيار الثالث"))

    outputs = gr.Textbox(elem_id="rtl_text")
 

    def rank_fluency(*dropdown_selections):
        user_rankings = []
        for i in range(0, len(dropdown_selections), 4):  # Process each set of 3 dropdowns for a question
            selections = dropdown_selections[i+1:i+4]
            # Check for duplicate selections within the same question
            unique_selections = set(tuple(selection) for selection in selections)

            # Now you can safely check if all sublists were unique
            if len(selections) != len(unique_selections):
                return "تأكد من عدم تكرار الإجابة لنفس السؤال"

            question_index = i // 4
            _, model_answers = questions[question_index]
            for j, chosen_answer in enumerate(selections, start=1):
                if chosen_answer == "...اختر":  # Skip unselected dropdowns
                    continue
                for model_answer, model in model_answers:
                    if model_answer == chosen_answer:
                        user_rankings.append((model, j))  # j is the rank (1, 2, or 3)
                        break
        process_rankings(user_rankings)
        return "سجلنا ردك، ما قصرت =)"

    return gr.Interface(fn=rank_fluency, inputs=inputs, outputs=outputs, title="ترتيب فصاحة النماذج",
                            description=".لديك مجموعة من الأسئلة، الرجاء ترتيب إجابات كل سؤال حسب جودة و فصاحة الإجابة", css=css)

iface = rank_interface()
iface.launch()