import gradio as gr
import pandas as pd
import os
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi
from uploads import add_new_eval

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@article{wei2024evaluating,
  title={Evaluating Copyright Takedown Methods for Language Models},
  author={Wei, Boyi and Shi, Weijia and Huang, Yangsibo and Smith, Noah A and Zhang, Chiyuan and Zettlemoyer, Luke and Li, Kai and Henderson, Peter},
  journal={arXiv preprint arXiv:2406.18664},
  year={2024}
}"""

api = HfApi()
TOKEN = os.environ.get("TOKEN", None)
LEADERBOARD_PATH = f"boyiwei/CoTaEval_leaderboard"
def restart_space():
    api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)

def format_floats(x):
    if isinstance(x, float):
        return f"{x:.3f}"
    return x
# Function to load data from a given CSV file
def baseline_load_data(model, dataset, setting, criteria):
    file_path = f'versions/{model}_{dataset}_{setting}_{criteria}.csv'  # Replace with your file paths
    df = pd.read_csv(file_path)
    df = df.applymap(format_floats)
    
    # we only want specific columns and in a specific order
    if dataset == 'news':
        column_names = ["model_name","method","rouge1","rougeL","semantic_sim","LCS(character)","LCS(word)","ACS(word)","Levenshtein Distance","Minhash Similarity", 
                    "MMLU","MT-Bench","Blocklisted F1","In-Domain F1","Efficiency"]
    elif dataset == 'books':
        column_names = ["model_name","method","bleu","rouge1","rougeL","semantic_sim","LCS(character)","LCS(word)","ACS(word)","Levenshtein Distance","Minhash Similarity", 
                    "MMLU","MT-Bench","Blocklisted rougeL","In-Domain rougeL","Efficiency"
                    ]
    df = df[column_names]
    
    return df

def update_dropdowns(setting, dataset, model, criteria):
    updates = {
        "setting": gr.update(interactive=True),
        "dataset": gr.update(interactive=True),
        "model": gr.update(interactive=True),
        "criteria": gr.update(interactive=True),
    }
    
    if setting == "memorization":
        updates["dataset"] = gr.update(value="news", interactive=False)
        updates["model"] = gr.update(value="llama2-7b-chat-hf-newsqa", interactive=False)
    elif dataset == "books":
        updates["setting"] = gr.update(value="rag", interactive=False)
        if model == "llama2-7b-chat-hf-newsqa":
            updates["model"] = gr.update(value="llama2-7b-chat-hf", interactive=True)
    elif model == "llama2-7b-chat-hf-newsqa":
        updates["setting"] = gr.update(value="memorization", interactive=False)
        updates["dataset"] = gr.update(value="news", interactive=False)
    elif model != "llama2-7b-chat-hf-newsqa":
        updates["setting"] = gr.update(value="rag", interactive=False)
    
    return updates["model"], updates["dataset"], updates["setting"], updates["criteria"]

    
def load_data(model, dataset, setting, criteria):
    baseline_df = baseline_load_data(model, dataset, setting, criteria)
    # now for every file in "versions/{model}-{version}/*.csv"
    # if file name is not "model-version.csv", load the file and append it to the dataframe
    # version = version.replace("%", "p")
    # for file in os.listdir(f'versions/{model}-{version}'):
    #     if file == f"{model}-{version}.csv":
    #         continue
    #     df = pd.read_csv(f'versions/{model}-{version}/{file}')
    #     df = df[baseline_df.columns]
    #     baseline_df = pd.concat([baseline_df, df])
    return baseline_df

# Function for searching in the leaderboard
def search_leaderboard(df, query):
    if query == "":
        return df
    else:
        return df[df['Method'].str.contains(query)]

# Function to change the version of the leaderboard
def change_version(model, dataset, setting, criteria):
    new_df = load_data(model, dataset, setting, criteria)
    return new_df


# Initialize Gradio app
demo = gr.Blocks()

with demo:
    gr.Markdown("""
    ## 🥇 CoTaEval Leaderboard
    CoTaEval is a benchmark to evaluate the feasibility and side effects of copyright takedown methods for language models.
    
    Project website: [https://cotaeval.github.io/](https://cotaeval.github.io/).
    """)

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                show_copy_button=True,
            ) #.style(show_copy_button=True)

    with gr.Tabs():
        with gr.TabItem("Leaderboard"):
            with gr.Row():
                setting_dropdown = gr.Dropdown(
                    choices = ["rag", "memorization"],
                    label="🔄 Select Setting",
                    value="rag",
                )
                dataset_dropdown = gr.Dropdown(
                    choices = ['news', 'books'],
                    label="🔄 Select Dataset",
                    value="news",
                )
                model_dropdown = gr.Dropdown(
                    choices=["llama2-7b-chat-hf", "llama2-70b-chat-hf", "dbrx-instruct", "llama2-7b-chat-hf-newsqa"],
                    label="🔄 Select Model",
                    value="llama2-7b-chat-hf",
                )
                criteria_dropdown = gr.Dropdown(
                    choices=['mean', 'max'],
                    label = "🔄 Select Criteria",
                    value = 'mean',
                )

            leaderboard_table = gr.components.Dataframe(
                value=load_data("llama2-7b-chat-hf", "news", "rag", "mean"),
                interactive=True,
                visible=True,
            )
            
            
            # setting_dropdown.change(
            #     update_dropdowns,
            #     inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
            #     outputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown]
            # )
            
            # dataset_dropdown.change(
            #     update_dropdowns,
            #     inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
            #     outputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown]
            # )

            # model_dropdown.change(
            #     update_dropdowns,
            #     inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
            #     outputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown]
            # )
            
            setting_dropdown.change(
                change_version,
                inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
                outputs=leaderboard_table
            )
            
            dataset_dropdown.change(
                change_version,
                inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
                outputs=leaderboard_table
            )
            
            model_dropdown.change(
                change_version,
                inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
                outputs=leaderboard_table
            )
            
            criteria_dropdown.change(
                change_version,
                inputs=[model_dropdown, dataset_dropdown, setting_dropdown, criteria_dropdown],
                outputs=leaderboard_table
            )
    
    # with gr.Accordion("Submit a new model for evaluation"):
    #     with gr.Row():
    #         with gr.Column():
    #             method_name_textbox = gr.Textbox(label="Method name")
    #             #llama, phi
    #             model_family_radio = gr.Radio(["llama", "phi"], value="llama", label="Model family")
    #             forget_rate_radio = gr.Radio(["1%", "5%", "10%"], value="10%", label="Forget rate")
    #             url_textbox = gr.Textbox(label="Url to model information")
    #         with gr.Column():
    #             organisation = gr.Textbox(label="Organisation")
    #             mail = gr.Textbox(label="Contact email")
    #             file_output = gr.File()
                

    #     submit_button = gr.Button("Submit Eval")
    #     submission_result = gr.Markdown()
    #     submit_button.click(
    #         add_new_eval,
    #         [
    #             method_name_textbox,
    #             model_family_radio,
    #             forget_rate_radio,
    #             url_textbox,
    #             file_output,
    #             organisation,
    #             mail
    #         ],
    #         submission_result,
    #     )


    gr.Markdown("""
    ## Links

    - [**Website**](https://cotaeval.github.io): The website for CoTaEval Project.
    - [**GitHub Repository**](https://github.com/boyiwei/CoTaEval): For source code of evaluating the takedown methods with CoTaEval.
    - [**Datasets**](https://huggingface.co/datasets/boyiwei/CoTaEval): Dataset for evaluation and unlearning.

    This leaderboard is based on the design of the [TOFU Leaderboard](https://huggingface.co/spaces/locuslab/tofu_leaderboard).


    """)

# scheduler = BackgroundScheduler()
# scheduler.add_job(restart_space, "interval", seconds=1800)
# scheduler.start()
# demo.queue(default_concurrency_limit=40).launch()

# demo.launch()
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()

custom_css = """
<style>
    select {
        max-width: 200px; /* 根据需要调整这个值 */
    }
    option {
        white-space: normal;
    }
</style>
"""


# demo.launch(debug=True, custom_css=custom_css)
demo.launch(debug=True)