from typing import List import gradio as gr import numpy as np import pandas as pd from assets.text import INTRODUCTION_TEXT, METRICS_TEXT, EVALUTION_TEXT, ACKNOWLEDGEMENTS_TEXT, REFERENCE_TEXT ORIGINAL_DF = pd.read_csv("./data/chinese_benchmark_gen.csv", sep='\t') # space separated values ORIGINAL_DF_PER = pd.read_csv("./data/chinese_benchmark_per.csv", sep='\t') # ORIGINAL_DF_SUB_GEN = pd.read_csv("./data/subclass_gen.csv", sep=',') # ORIGINAL_DF_SUB_PER = pd.read_csv("./data/subclass_per.csv", sep=',') METRICS = ["Accuracy", "Precision_Unsafe", "Recall_Unsafe", "Precision_Safe", "Recall_Safe", "None"] SUBCLASS = ["Discrimination", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"] #SPLITS = ["Overall", "Subclass"] SPLITS = ["Overall", "Discrimination", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"] CLASSIFICATION = { "model_size": [ ">65B", "~30B", "10B~20B", "5B~10B", "API", ] } # _BIBTEX = """ Waiting for paper ... """ _BIBTEX = """ @misc{zhang2024chinesesafechinesebenchmarkevaluating, title={ChineseSafe: A Chinese Benchmark for Evaluating Safety in Large Language Models}, author={Hengxiang Zhang and Hongfu Gao and Qiang Hu and Guanhua Chen and Lili Yang and Bingyi Jing and Hongxin Wei and Bing Wang and Haifeng Bai and Lei Yang}, year={2024}, eprint={2410.18491}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2410.18491}, } """ _LAST_UPDATED = "November 18, 2024" banner_url = "./assets/logo.png" _BANNER = f'
Banner
' # noqa def retrieve_array_from_text(text): return np.fromstring(text.replace("[", "").replace("]", ""), dtype=float, sep=",") def format_csv_numbers(text): return text.split('/')[0] def format_csv_numbers_second(text): return text.split() def format_number(x): return float(f"{x:.3}") def get_dataset_csv( model_size: List[str], ): df = ORIGINAL_DF[ORIGINAL_DF['Size'].isin(model_size)] df = df.drop(columns="Size") leaderboard_table = gr.components.Dataframe( value=df, interactive=False, visible=True, ) return leaderboard_table def get_dataset_csv_per( model_size: List[str], ): df = ORIGINAL_DF_PER[ORIGINAL_DF_PER['Size'].isin(model_size)] df = df.drop(columns="Size") leaderboard_table = gr.components.Dataframe( value=df, interactive=False, visible=True, ) return leaderboard_table # this is a sub function for csv table def get_dataset_csv_sub_gen( model_size: List[str], subclass_choice: List[str], ): df = ORIGINAL_DF_SUB_GEN[ORIGINAL_DF_SUB_GEN['Size'].isin(model_size)] df = df.drop(columns="Size") # get subclass subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"] df = df[subclass_choice_label] leaderboard_table = gr.components.Dataframe( value=df, interactive=False, visible=True, ) return leaderboard_table # this is a sub function for csv table def get_dataset_csv_sub_per( model_size: List[str], subclass_choice: List[str], ): df = ORIGINAL_DF_SUB_PER[ORIGINAL_DF_SUB_PER['Size'].isin(model_size)] df = df.drop(columns="Size") # get subclass subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"] df = df[subclass_choice_label] leaderboard_table = gr.components.Dataframe( value=df, interactive=False, visible=True, ) return leaderboard_table def get_dataset_classfier_gen( model_size: List[str], main_choice: List[str], ): if main_choice == "Overall": leaderboard_table = get_dataset_csv(model_size) elif main_choice != "Subclass": subclass_choice = main_choice leaderboard_table = get_dataset_csv_sub_gen(model_size, subclass_choice) return leaderboard_table def get_dataset_classfier_per( model_size: List[str], main_choice: List[str], ): if main_choice == "Overall": leaderboard_table = get_dataset_csv_per(model_size) elif main_choice != "Overall": subclass_choice = main_choice leaderboard_table = get_dataset_csv_sub_per(model_size, subclass_choice) return leaderboard_table with gr.Blocks() as demo: gr.Markdown("

ChineseSafe Leaderboard

", elem_classes="markdown-text") with gr.Row(): #gr.Image(banner_url, height=160, scale=1) # 👉 this part is for image gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # gr.Textbox(_INTRODUCTION_TEXT, scale=5) with gr.Row(): gr.Markdown(METRICS_TEXT, elem_classes="markdown-text") with gr.Row(): gr.Markdown(EVALUTION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Column(scale=0.8): main_choice = gr.Dropdown( choices=SPLITS, value="Overall", label="Type", info="Please choose the type to display.", ) with gr.Column(scale=10): model_choice = gr.CheckboxGroup( choices=CLASSIFICATION["model_size"], value=CLASSIFICATION["model_size"], # all be choosed label="Model Size", info="Please choose the model size to display.", ) #👉 this part is for csv table generatived with gr.Tabs(elem_classes="tab-buttons") as tabs: # ----------------- modify text ----------------- with gr.TabItem("🏅 Generation", elem_id="od-benchmark-tab-table", id=6): dataframe_all_gen = gr.components.Dataframe( elem_id="leaderboard-table", ) with gr.TabItem("🏅 Perplexity", elem_id="od-benchmark-tab-table", id=5): dataframe_all_per = gr.components.Dataframe( elem_id="leaderboard-table", ) # ----------------- modify text ----------------- with gr.Row(): gr.Markdown(ACKNOWLEDGEMENTS_TEXT, elem_classes="markdown-text") with gr.Row(): gr.Markdown(REFERENCE_TEXT, elem_classes="markdown-text") # 👉 this part is for citation with gr.Row(): with gr.Accordion("📙 Citation", open=True): gr.Textbox( value=_BIBTEX, lines=7, label="Copy the BibTeX snippet to cite this source", elem_id="citation-button", show_copy_button=True ) gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text") # --------------------------- all -------------------------------- # this is all result Perplexity main_choice.change( get_dataset_classfier_per, inputs=[model_choice, main_choice], outputs=dataframe_all_per, ) model_choice.change( get_dataset_classfier_per, inputs=[model_choice, main_choice], outputs=dataframe_all_per, ) demo.load( fn=get_dataset_classfier_per, inputs=[model_choice, main_choice], outputs=dataframe_all_per, ) # this is all result generatived main_choice.change( get_dataset_classfier_gen, inputs=[model_choice, main_choice], outputs=dataframe_all_gen, ) model_choice.change( get_dataset_classfier_gen, inputs=[model_choice, main_choice], outputs=dataframe_all_gen, ) demo.load( fn=get_dataset_classfier_gen, inputs=[model_choice, main_choice], outputs=dataframe_all_gen, ) demo.launch(share=True)