import json import datetime from pathlib import Path import uuid from typing import Tuple import pandas as pd import gradio as gr from datasets import load_dataset from huggingface_hub import CommitScheduler # HFデータセット アップロード先 # (切り替えてテストする用に配列) OUTPUT_DATASET = [ "team-hatakeyama-phase2/annotation_tanuki_phase2", "kevineen/Tanuki-Phase2-annotation-dataset", # 出力テスト ] # アノテーション対象データセット ANNOTATION_DATASET = [ "hatakeyama-llm-team/AutoGeneratedJapaneseQA", "hatakeyama-llm-team/AutoGeneratedJapaneseQA-other", "kanhatakeyama/ChatbotArenaJaMixtral8x22b", "kanhatakeyama/OrcaJaMixtral8x22b", "kanhatakeyama/LogicalDatasetsByMixtral8x22b", # データ形式未対応(対応予定 # "hatakeyama-llm-team/WikiBookJa", # "kanhatakeyama/AutoWikiQA", # "susumuota/SyntheticTextWikiTranslate-askllm-v1", # Ask-LLM 翻訳 # Nemotron データ (将来用 ] MULTI_TURN_ANNOTATION_DATASET = [ # マルチターン 未対応 "kanhatakeyama/AutoMultiTurnByMixtral8x22b", ] # Session State : (ブラウザセッション単位の変数管理) =========================== # UIのEnable/Disable用State is_selected_dataset = gr.State(False) is_loaded_dataset = gr.State(False) # 選択中のデータセットリスト dropdown_dataset_list = gr.State(value=ANNOTATION_DATASET) # 現在の対象データセット 初期値は"hatakeyama-llm-team/AutoGeneratedJapaneseQA", select_dropdown_dataset = gr.State(dropdown_dataset_list.value[0]) select_dataset = gr.State(None) # 現在のデータセット select_dataset_total_len = gr.State(0) # 現在のデータセットの長さ select_idx = gr.State(0) # 現在のインデックス (ランダムモードにするなら不要? # random_mode = gr.State(False) # ランダムモード # 回答者がアノテーションしたデータセット annotated_dataset = gr.State( pd.DataFrame({ 'dataset_name': [], # 対象データセット 'dataset_id': [], # データセットindex 'who': [], # アノテーション者名 'unknown_quality': [], # 不明能 'good': [], # 良 'bad': [], # 悪 'is_proofreading_1': [], # テキストに修正があったか 'answer_text_1': [], 'is_proofreading_2': [], # 2ターン目用 'answer_text_2': [], # 2ターン目用 # 5段階評価 'score': [], # 総合 (5段階評価) 'helpfulness': [], # 有用性 (5段階評価) 'correctness': [], # 正確さ (5段階評価) 'coherence': [], # 一貫性 (5段階評価) 'complexity': [], # 複雑さ (5段階評価) 'verbosity': [], # 冗長性 (5段階評価) 'humor': [], # ユーモア (SteerLM) 'creativity': [], # 創造性 (SteerLM) 'appropriate': [], # 適切性 (SteerLMでは不適) 'following_instructions': [], # 指示が合った場合、従っているか (SteerLMのfails_taskに該当) 'politeness': [], # 礼儀正しさ (minnadechat) 'harmfulness': [], # 有性 (minnadechat) # 0: 分からない(未設定) 1: 無 2: 有 'typos': [], # 誤字脱字 (minnadechat) 'hate': [], # ヘイト 'sexual': [], # 性的 'violence': [], # 暴力 'suicide': [], # 自殺 'threat': [], # 犯罪 'gun': [], # 重火器 'controlled_substance': [], # 規制対象物質 'criminal_planing': [], # 犯罪計画 'privacy': [], # 個人情報 'harassment': [], # ハラスメント 'profanity': [], # 冒涜 'political_content': [], # 政治的内容 (SteerLM) 'moral_judgement': [], # 非道徳 (SteerLM) }) ) is_dataset_loaded = gr.State(False) # データセットロード状態 you_dataset_id = gr.State(0) # 回答者がアノテーションしているデータのID dataset_name = gr.State("") # 編集に使用したデータセット名 dataset_id = gr.State(0) # 加工元データセットのindex who = gr.State("") # アノテーション者名 # シンプルモード unknown_quality = gr.State(False) # 分からない(未設定) good = gr.State(False) # 良 bad = gr.State(False) # 悪 initial_answer_text_1 = gr.State("") # 回答1を整形したかチェック用 initial_answer_text_2 = gr.State("") # 回答2を整形したかチェック用 is_proofreading_1 = gr.State(False) # 回答1を整形したか_1 answer_text_1 = gr.State("") # answer_1 回答 is_proofreading_2 = gr.State(False) # 回答2を整形したか_2 answer_text_2 = gr.State("") # answer_2 回答 # (5段階評価) # 0は判断していない、又は判断が難しいという特徴量にする score = gr.State(3) # 総合スコア 初期値は3 (最低限必須) helpfulness = gr.State(0) # 有用性 correctness = gr.State(0) # 正確さ coherence = gr.State(0) # 一貫性 complexity = gr.State(0) # 複雑さ verbosity = gr.State(0) # 冗長性 humor = gr.State(0) # ユーモア (SteerLM) creativity = gr.State(0) # 創造性 (SteerLM) appropriate = gr.State(0) # 適切性 (SteerLMではnot_appropriate) following_instructions = gr.State(0) # 指示への忠実度 (SteerLMのfails_task) politeness = gr.State(0) # 礼儀正しさ (minnadechat) harmfulness = gr.State(0) # 有度 (minnadechat) # (有無評価) # 0は判断していない、又は判断が難しいという特徴量にする hate = gr.State(0) # ヘイト sexual = gr.State(0) # 性的 violence = gr.State(0) # 暴力 suicide = gr.State(0) # 自殺 threat = gr.State(0) # 犯罪 gun = gr.State(0) # 銃・重火器 controlled_substance = gr.State(0) # 規制対象物質 criminal_planing = gr.State(0) # 犯罪計画 privacy = gr.State(0) # 個人情報・プライバシー情報 harassment = gr.State(0) # ハラスメント profanity = gr.State(0) # 冒涜 political_content = gr.State(0) # 政治的内容 (SteerLM) moral_judgement = gr.State(0) # 非道徳 (SteerLM) typos = gr.State(0) # 誤字脱字(minnadechat) # 未整理 ======================================== # データセット読み込み def dataset_load_fn() -> Tuple[ str, str, str, str, str, str, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, gr.update, ]: is_dataset_loaded.value = False # ロード状態初期化 select_dataset.value = load_dataset( select_dropdown_dataset.value ) # DatasetオブジェクトをPandas DataFrameに変換 df = select_dataset.value["train"].to_pandas() # ランダム化実施 # index列を追加し、シャッフル df = df.reset_index(drop=False) # 元のindexを保持 df = df.sample(frac=1).reset_index(drop=True) # シャッフル select_dataset.value["train"] = df # シャッフルされたDataFrameを格納 select_idx.value = 0 # index初期化 select_dataset_total_len.value = len(df) # 長さを取得 is_dataset_loaded.value = True # ロード完了 # データロード時にテキストの初期値を設定 initial_answer_text_1.value = df.iloc[select_idx.value]["answer"] initial_answer_text_2.value = df.iloc[select_idx.value]["answer"] return df.iloc[select_idx.value]["question"], \ df.iloc[select_idx.value]["answer"], \ df.iloc[select_idx.value]["question"], \ df.iloc[select_idx.value]["answer"], \ df.iloc[select_idx.value]["question"], \ df.iloc[select_idx.value]["answer"], \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True), \ gr.update(interactive=True) # データの保存処理 ======================================== # Spaceの場合の保存先はCommitSchedulerのpath_in_repoフォルダ # (ローカル開発の場合./user_annotationフォルダにjsonファイルが作成される) annotation_file = Path("user_annotation/") / f"data_{uuid.uuid4()}.json" annotated_folder = annotation_file.parent scheduler = CommitScheduler( repo_id=OUTPUT_DATASET[1], repo_type="dataset", folder_path=annotated_folder, path_in_repo="data", # Spaceの場合の保存先フォルダー private=True, every=5, # 5分毎にアップロード HuggingFAce_Documentの最低推奨値 ) # チェックボックスをintに変換 def checkbox_to_int(checkbox_value) -> int: if checkbox_value == "不明": output = 0 elif checkbox_value == "有": output = 1 elif checkbox_value == "無": output = 2 else : output = 0 print("error: ") return output # CommitScheduler (HFへのデータアップロード def save_annotation( dataset_name: str, dataset_id: int, who: str, unknown_quality: bool, good: bool, bad: bool, is_proofreading_1: bool, answer_text_1: str, is_proofreading_2: bool, answer_text_2: str, score: int, helpfulness: int, correctness: int, coherence: int, complexity: int, verbosity: int, humor: int, creativity: int, appropriate: int, following_instructions: int, politeness: int, harmfulness: int, hate: int, sexual: int, violence: int, suicide: int, threat: int, gun: int, controlled_substance: int, criminal_planing: int, privacy: int, harassment: int, profanity: int, political_content: int, moral_judgement: int, typos: int, ) -> None: annotated_dataset.value = pd.concat([ annotated_dataset.value, pd.DataFrame({ 'dataset_name': [dataset_name], 'dataset_id': [dataset_id], 'who': [who], 'unknown_quality': [unknown_quality], 'good': [good], 'bad': [bad], 'is_proofreading_1': [is_proofreading_1], "answer_text_1": [answer_text_1], 'is_proofreading_2': [is_proofreading_2], 'answer_text_2': [answer_text_2], 'score': [score], 'helpfulness': [helpfulness], 'correctness': [correctness], 'complexity': [complexity], 'verbosity': [verbosity], 'humor': [humor], 'creativity': [creativity], 'appropriate': [appropriate], 'following_instructions': [following_instructions], 'politeness': [politeness], 'harmfulness': [harmfulness], 'hate': [hate], 'sexual': [sexual], 'violence': [violence], 'suicide': [suicide], 'threat': [threat], 'gun': [gun], 'controlled_substance': [controlled_substance], 'criminal_planing': [criminal_planing], 'privacy': [privacy], 'harassment': [harassment], 'profanity': [profanity], 'political_content': [political_content], 'moral_judgement': [moral_judgement], 'types': [typos] })], ignore_index=True).reset_index(drop=True) # 書き込み with scheduler.lock: with annotation_file.open("a", encoding='utf-8') as f: data_to_write = { # "id": , CommitSchedulerは、取得して末尾idを付与することが不可能? "datetime": str(datetime.datetime.now().isoformat()), "dataset_name": dataset_name, "dataset_id": int(dataset_id), "who": who, "unknown_quality": unknown_quality, "good": good, "bad": bad, "is_proofreading_1": is_proofreading_1, "answer_text_1": answer_text_1, "is_proofreading_2": is_proofreading_2, "answer_text_2": answer_text_2, "score": int(score), "helpfulness": int(helpfulness), "correctness": int(correctness), "coherence": int(coherence), "complexity": int(complexity), "verbosity": int(verbosity), "humor": int(humor), "creativity": int(creativity), "appropriate": int(appropriate), "following_instructions": int(following_instructions), "politeness": int(politeness), "harmfulness": int(harmfulness), "hate": int(hate), "sexual": int(sexual), "violence": int(violence), "suicide": int(suicide), "threat": int(threat), "gun": int(gun), "controlled_substance": int(controlled_substance), "criminal_planing": int(criminal_planing), "privacy": int(privacy), "harassment": int(harassment), "profanity": int(profanity), "political_content": int(political_content), "moral_judgement": int(moral_judgement), "typos": int(typos) } f.write(json.dumps(data_to_write, ensure_ascii=False)) f.write("\n") # アノテーションの追加処理 ======================================== # UI処理 ======================================== # ユーザー名表示 def hello(profile: gr.OAuthProfile | None) -> Tuple[str, str]: if profile is None: return "プライベートデータセット取得のためにログインしてください。", who.value who.value = profile.username return f'{profile.username} さん、よろしくお願いいたします。', who.value # テーマの状態 theme_ = gr.themes.Default() # 後のCSSデザイン変更用 def load_css(): with open("style.css", "r") as file: css_content = file.read() return css_content # Gradio 画面 ============================================ with gr.Blocks(theme=theme_, css=load_css()) as demo: gr.Markdown("# データセット アノテーション for Tanuki (Phase2)") with gr.Row(): gr.Markdown("### GENIACにて開発中のLLM用データセットに対してアノテーションするSpaceです\n \ 入力されたデータは使用される可能性があるため、個人情報・秘匿情報などは入力しないでください。\n \ テスト中です。") gr_who = gr.TextArea(value=who.value, lines=1, max_lines=1, label="ユーザー名 (入力してください)") def change_name(name: str): who.value = name gr_who.change( change_name, inputs=[gr_who], outputs=[] ) with gr.Tab("アノテーション (シングルターン)"): # HF login 機能 # with gr.Row(equal_height=True): # gr.LoginButton(value="HuggingFace ログイン", # logout_value="HuggingFace ログアウト", scale=1) # ユーザー名 # gr_profile_name = gr.Markdown() # demo.load(hello, inputs=None, outputs=[gr_profile_name, who]) with gr.Row(): def dropdown_select(select_value) -> None: select_dropdown_dataset.value = select_value # 対象データセットの選択 gr_dropdown_dataset = gr.Dropdown( label="データセット選択 ①", choices=dropdown_dataset_list.value, value=select_dropdown_dataset.value, elem_id="dataset_sel", scale=2) gr_dropdown_dataset.change( dropdown_select, inputs=[gr_dropdown_dataset] ) gr_data_load_btn = gr.Button("② データセットを読み込む") with gr.Column() as content_column: with gr.Tab("③ シンプル(良・悪)"): with gr.Column(): with gr.Row(equal_height=True): gr_good_btn = gr.Button("良い", interactive=False) gr_unknown_btn = gr.Button( "分からない", interactive=False) # 「分からない」ボタンを追加 gr_bad_btn = gr.Button("悪い", interactive=False) gr_question_text_1_1 = gr.Textbox( label="質問: ", lines=5, interactive=False) gr_answer_text_1_1 = gr.Textbox( label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False) with gr.Tab("③ 5段階評価(シンプル)"): with gr.Row() as simple_score_btn: gr_score_5_btn = gr.Button("5: 高品質", interactive=False) gr_score_4_btn = gr.Button("4: 良い", interactive=False) gr_score_3_btn = gr.Button("3: 普通", interactive=False) gr_score_2_btn = gr.Button("2: 悪い", interactive=False) gr_score_1_btn = gr.Button("1: 低品質", interactive=False) gr_question_text_2_1 = gr.Textbox( label="質問: ", lines=5, interactive=False) gr_answer_text_2_1 = gr.Textbox( label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False) with gr.Tab("③ 5段階評価 (詳細)"): with gr.Row(): with gr.Column() as EvalFive: gr_question_text_3_1 = gr.Textbox( label="質問: ", lines=10, interactive=False) # gr_explain = gr.Markdown("回答") gr_answer_text_3_1 = gr.Textbox( label="回答: 訂正して頂けると、品質が上がります。", lines=35, interactive=False) with gr.Column() as EvalFiveDetail: with gr.Row(): gr_submit_score = gr.Button("評価送信", interactive=False) gr_score_reset = gr.Button("スコアリセット", interactive=False) gr_score_detail = gr.Slider(label="総合スコア 【必須】",value=3, minimum=1, maximum=5, step=1, interactive=False) gr_eval_annotation_explain = gr.Markdown("詳細アノテーション (5点満点)") with gr.Row() : gr_helpfulness = gr.Slider(label="有用性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_correctness = gr.Slider(label="正確さ", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_coherence = gr.Slider(label="一貫性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_complexity = gr.Slider(label="複雑さ", minimum=0, value=0, maximum=5, step=1, interactive=False) with gr.Row() : gr_verbosity = gr.Slider(label="冗長性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_humor = gr.Slider(label="ユーモア", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_creativity = gr.Slider(label="創造性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_appropriate = gr.Slider(label="適切性", minimum=0, value=0, maximum=5, step=1, interactive=False) with gr.Row() : gr_following_instructions = gr.Slider(label="忠実性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_politeness = gr.Slider(label="礼儀正しさ", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_harmfulness = gr.Slider(label="有害性", minimum=0, value=0, maximum=5, step=1, interactive=False) gr_text_4 = gr.Markdown("判断可能な物のみ\n\nスコアを付けてください。") gr_text_5 = gr.Markdown("") with gr.Row(): gr_typos = gr.Radio(label="誤字・脱字", choices=["不明", "有", "無"], value="不明", interactive=False) gr_moral_judgement = gr.Radio(label="非道徳", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_hate = gr.Radio(label="ヘイト", choices=["不明", "有", "無"], value="不明", interactive=False) gr_sexual = gr.Radio(label="性的内容", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_violence = gr.Radio(label="暴力的", choices=["不明", "有", "無"], value="不明", interactive=False) gr_suicide = gr.Radio(label="自殺行為", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_threat = gr.Radio(label="犯罪", choices=["不明", "有", "無"], value="不明", interactive=False) gr_gun = gr.Radio(label="銃等", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_controlled_substance = gr.Radio(label="規制対象物質", choices=["不明", "有", "無"], value="不明", interactive=False) gr_criminal_planing = gr.Radio(label="犯罪計画", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_privacy = gr.Radio(label="個人情報", choices=["不明", "有", "無"], value="不明", interactive=False) gr_harassment = gr.Radio(label="ハラスメント", choices=["不明", "有", "無"], value="不明", interactive=False) with gr.Row(): gr_profanity = gr.Radio(label="冒涜行為", choices=["不明", "有", "無"], value="不明", interactive=False) gr_political_content = gr.Radio(label="政治的内容", choices=["不明", "有", "無"], value="不明", interactive=False) # 5段階評価 詳細のgr.State Update def eval_submit( # text g_answer_text_3_1: str, # slicer g_score: int, g_helpfulness: int, g_correctness: int, g_coherence: int, g_complexity: int, g_verbosity: int, g_humor: int, g_creativity: int, g_appropriate: int, g_following_instructions: int, g_politeness: int, g_harmfulness: int, # checkbox g_hate: str, g_sexual: str, g_violence: str, g_suicide: str, g_threat: str, g_gun: str, g_controlled_substance: str, g_criminal_planing: str, g_privacy: str, g_harassment: str, g_profanity: str, g_political_content: str, g_moral_judgement: str, g_typos: str, ): # 状態初期化 good.value = False bad.value = False unknown_quality.value = False # テキストに更新があったかどうかチェック if initial_answer_text_1.value != g_answer_text_3_1: is_proofreading_1.value = True answer_text_1.value = g_answer_text_3_1 else: is_proofreading_1.value = False answer_text_1.value = "" # 送信用データ更新 score.value = g_score helpfulness.value = g_helpfulness correctness.value = g_correctness coherence.value = g_coherence complexity.value = g_complexity verbosity.value = g_verbosity humor.value = g_humor creativity.value = g_creativity appropriate.value = g_appropriate following_instructions.value = g_following_instructions politeness.value = g_politeness harmfulness.value = g_harmfulness hate.value = checkbox_to_int(g_hate) sexual.value = checkbox_to_int(g_sexual) violence.value = checkbox_to_int(g_violence) suicide.value = checkbox_to_int(g_suicide) threat.value = checkbox_to_int(g_threat) gun.value = checkbox_to_int(g_gun) controlled_substance.value = checkbox_to_int(g_controlled_substance) criminal_planing.value = checkbox_to_int(g_criminal_planing) privacy.value = checkbox_to_int(g_privacy) harassment.value = checkbox_to_int(g_harassment) profanity.value = checkbox_to_int(g_profanity) political_content.value = checkbox_to_int(g_political_content) moral_judgement.value = checkbox_to_int(g_moral_judgement) typos.value = checkbox_to_int(g_typos) # データ送信 # 表示更新 # indexを進める select_idx.value += 1 df = select_dataset.value["train"] # ループさせるか、エラー処理を行う if select_idx.value >= len(df): select_idx.value = 0 # データセットに追加 df = select_dataset.value["train"] save_annotation( select_dropdown_dataset.value, df.iloc[select_idx.value]['index'], who.value, unknown_quality.value, good.value, bad.value, is_proofreading_1.value, answer_text_1.value, is_proofreading_2.value, answer_text_2.value, score.value, helpfulness.value, correctness.value, coherence.value, complexity.value, verbosity.value, humor.value, creativity.value, appropriate.value, following_instructions.value, politeness.value, harmfulness.value, hate.value, sexual.value, violence.value, suicide.value, threat.value, gun.value, controlled_substance.value, criminal_planing.value, privacy.value, harassment.value, profanity.value, political_content.value, moral_judgement.value, typos.value, ) # Nextデータ初期化 initialize_next_data(df) # 5段階評価をリセット score_reset_display() # UI更新のための返り値 return gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ *score_reset_display() # スコアリセット用の返り値 gr_submit_score.click( eval_submit, inputs=[ gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ], # UI更新 outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ], ) # 5段階評価ボタンのクリックイベントを定義 def score_button_clicked(button_value): # 状態初期化 good.value = False bad.value = False unknown_quality.value = False score.value = button_value # 5段階評価クリック def update_annotation( input_ans_1: str = None, input_ans_2: str = None, is_good: bool = None, # good/bad を表すフラグを追加 is_unknown: bool = None, # 「分からない」を表すフラグを追加 is_simple: bool = None, score_value: int = None # 5段階評価の値、good/badの場合はNone ) -> Tuple[ gr.update, gr.update, gr.update, gr.update, gr.update, ]: # good/bad/unknown と score の状態を更新 update_evaluation_state(is_good, is_unknown, score_value) # 変更を検知 (5段階評価の場合も処理するように変更) update_answer_state(input_ans_1, input_ans_2) # 表示更新 # indexを進める select_idx.value += 1 df = select_dataset.value["train"] # ループさせるか、エラー処理を行う if select_idx.value >= len(df): select_idx.value = 0 # シンプルモードクリックで、scoreを0に設定 if is_simple == True: score.value = 0 # データセットに追加 # 元のindex番号(dataset_id)を指定して保存 save_annotation_data(df) # Nextデータ初期化 initialize_next_data(df) # 5段階評価をリセット reset_values = score_reset_display() return gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ gr.update(value=df.iloc[select_idx.value]["question"]), \ gr.update(value=df.iloc[select_idx.value]["answer"]), \ *reset_values # 評価状態リセット def update_evaluation_state(is_good, is_unknown, score_value): if score_value is not None: # 5段階評価の場合 good.value = False bad.value = False unknown_quality.value = False score.value = score_value else: # good/bad/unknown 評価の場合 good.value = is_good bad.value = not is_good if not is_unknown else False unknown_quality.value = is_unknown # 「分からない」の状態を設定 # 変更を検知して値を設定 def update_answer_state(input_ans_1, input_ans_2): if input_ans_1 is not None and initial_answer_text_1.value != input_ans_1: is_proofreading_1.value = True answer_text_1.value = input_ans_1 else: answer_text_1.value = "" # 2ターン目用 if input_ans_2 is not None and initial_answer_text_2.value != input_ans_2: is_proofreading_2.value = True answer_text_2.value = input_ans_2 else: answer_text_2.value = "" # データ保存 def save_annotation_data(df): save_annotation( select_dropdown_dataset.value, # datasetIdは元のindex番号を使用 df.iloc[select_idx.value]['index'], who.value, unknown_quality.value, good.value, bad.value, is_proofreading_1.value, answer_text_1.value, is_proofreading_2.value, answer_text_2.value, score.value, helpfulness.value, correctness.value, coherence.value, complexity.value, verbosity.value, humor.value, creativity.value, appropriate.value, following_instructions.value, politeness.value, harmfulness.value, hate.value, sexual.value, violence.value, suicide.value, threat.value, gun.value, controlled_substance.value, criminal_planing.value, privacy.value, harassment.value, profanity.value, political_content.value, moral_judgement.value, typos.value, ) # 次データ読込時の初期化 def initialize_next_data(df): is_proofreading_1.value = False is_proofreading_2.value = False initial_answer_text_1.value = df.iloc[select_idx.value]["answer"] initial_answer_text_2.value = df.iloc[select_idx.value]["answer"] # 評価不可 def unknown_click(input_ans_1, input_ans_2): good.value = False bad.value = False score_reset_display() return update_annotation( input_ans_1=input_ans_1, input_ans_2=input_ans_2, is_good=False, is_unknown=True, is_simple=True, ) # 評価不可クリックイベント gr_unknown_btn.click( unknown_click, inputs=[ gr_answer_text_1_1, gr_answer_text_2_1 ], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ] ) # 良いクリック def good_click(input_ans_1, input_ans_2): score_reset_display() return update_annotation( input_ans_1=input_ans_1, input_ans_2=input_ans_2, is_good=True, is_unknown=False, is_simple=True, ) # 良いクリック gr_good_btn.click( good_click, inputs=[ gr_answer_text_1_1, gr_answer_text_2_1 ], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ] ) # 低評価クリック def bad_click(input_ans_1, input_ans_2): score_reset_display() return update_annotation( input_ans_1=input_ans_1, input_ans_2=input_ans_2, is_good=False, is_unknown=False, is_simple=True, ) # 低評価クリックイベント gr_bad_btn.click( bad_click, inputs=[ gr_answer_text_1_1, gr_answer_text_2_1 ], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ] ) # 5段階評価ボタンのクリックイベント gr_score_1_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=1), inputs=[gr_answer_text_2_1], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ]) gr_score_2_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=2), inputs=[gr_answer_text_2_1], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ]) gr_score_3_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=3), inputs=[gr_answer_text_2_1], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ]) gr_score_4_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=4), inputs=[gr_answer_text_2_1], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ]) gr_score_5_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=5), inputs=[gr_answer_text_2_1], outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ]) # Scoreリセット def score_reset_display(): # gr.State score.value = 3 helpfulness.value = 0 correctness.value = 0 coherence.value = 0 complexity.value = 0 verbosity.value = 0 humor.value = 0 creativity.value = 0 appropriate.value = 0 following_instructions.value = 0 politeness.value = 0 harmfulness.value = 0 hate.value = 0 sexual.value = 0 violence.value = 0 suicide.value = 0 threat.value = 0 gun.value = 0 controlled_substance.value = 0 criminal_planing.value = 0 privacy.value = 0 harassment.value = 0 profanity.value = 0 political_content.value = 0 moral_judgement.value = 0 typos.value = 0 return 3,0,0,0,0,0,0,0,0,0,0,0, \ "不明", "不明", "不明", "不明", "不明", \ "不明", "不明", "不明", "不明", "不明", \ "不明", "不明", "不明", "不明" # Scoreリセット gr_score_reset.click( score_reset_display, inputs=[], outputs=[ gr_score_detail, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ] ) # データ読込 gr_data_load_btn.click( dataset_load_fn, inputs=None, # textをセットし、interactiveをTrueにする (UI更新) outputs=[ gr_question_text_1_1, gr_answer_text_1_1, gr_question_text_2_1, gr_answer_text_2_1, gr_question_text_3_1, gr_answer_text_3_1, gr_answer_text_1_1, gr_answer_text_2_1, gr_answer_text_3_1, gr_unknown_btn, gr_good_btn, gr_bad_btn, gr_score_5_btn, gr_score_4_btn, gr_score_3_btn, gr_score_2_btn, gr_score_1_btn, gr_submit_score, gr_score_detail, gr_score_reset, gr_helpfulness, gr_correctness, gr_coherence, gr_complexity, gr_verbosity, gr_humor, gr_creativity, gr_appropriate, gr_following_instructions, gr_politeness, gr_harmfulness, gr_hate, gr_sexual, gr_violence, gr_suicide, gr_threat, gr_gun, gr_controlled_substance, gr_criminal_planing, gr_privacy, gr_harassment, gr_profanity, gr_political_content, gr_moral_judgement, gr_typos ] ) # TODO Tab切り替えで、アノテ済みの一覧を表示する # with gr.Tab("アノテ済みデータセット(管理画面)"): # タブを切り替えた時にデータ表示を更新する if __name__ == "__main__": demo.launch()