import json
import datetime
from pathlib import Path
import uuid
from typing import Tuple

import pandas as pd

import gradio as gr
from datasets import load_dataset
from huggingface_hub import CommitScheduler

# HFデータセット アップロード先
# (切り替えてテストする用に配列)
OUTPUT_DATASET = [
    "team-hatakeyama-phase2/annotation_tanuki_phase2",
    "kevineen/Tanuki-Phase2-annotation-dataset", # 出力テスト
]

# アノテーション対象データセット
ANNOTATION_DATASET = [
    "hatakeyama-llm-team/AutoGeneratedJapaneseQA",
    "hatakeyama-llm-team/AutoGeneratedJapaneseQA-other",
    "kanhatakeyama/ChatbotArenaJaMixtral8x22b",
    "kanhatakeyama/OrcaJaMixtral8x22b",
    "kanhatakeyama/LogicalDatasetsByMixtral8x22b",

    # データ形式未対応（対応予定
    # "hatakeyama-llm-team/WikiBookJa",
    # "kanhatakeyama/AutoWikiQA",
    # "susumuota/SyntheticTextWikiTranslate-askllm-v1", # Ask-LLM 翻訳
    
    # Nemotron データ (将来用
]

MULTI_TURN_ANNOTATION_DATASET = [
    # マルチターン 未対応
    "kanhatakeyama/AutoMultiTurnByMixtral8x22b",
]

# Session State : (ブラウザセッション単位の変数管理) ===========================

# UIのEnable/Disable用State
is_selected_dataset = gr.State(False)
is_loaded_dataset = gr.State(False)

# 選択中のデータセットリスト
dropdown_dataset_list = gr.State(value=ANNOTATION_DATASET)

# 現在の対象データセット 初期値は"hatakeyama-llm-team/AutoGeneratedJapaneseQA",
select_dropdown_dataset = gr.State(dropdown_dataset_list.value[0])
select_dataset = gr.State(None)  # 現在のデータセット
select_dataset_total_len = gr.State(0)  # 現在のデータセットの長さ
select_idx = gr.State(0)  # 現在のインデックス (ランダムモードにするなら不要？
# random_mode = gr.State(False) # ランダムモード

# 回答者がアノテーションしたデータセット
annotated_dataset = gr.State(
    pd.DataFrame({
        'dataset_name': [], # 対象データセット
        'dataset_id': [], # データセットindex
        'who': [], # アノテーション者名
        'unknown_quality': [],  # 不明能
        'good': [], # 良
        'bad': [], # 悪
        'is_proofreading_1': [], # テキストに修正があったか
        'answer_text_1': [],
        'is_proofreading_2': [],  # 2ターン目用
        'answer_text_2': [],  # 2ターン目用
        
        # 5段階評価
        'score': [], # 総合 (5段階評価)
        'helpfulness': [], # 有用性　(5段階評価)
        'correctness': [], # 正確さ　(5段階評価)
        'coherence': [], # 一貫性　(5段階評価)
        'complexity': [], # 複雑さ　(5段階評価)
        'verbosity': [], # 冗長性　(5段階評価)
        'humor': [], # ユーモア (SteerLM)
        'creativity': [], # 創造性 (SteerLM)
        'appropriate': [], # 適切性 (SteerLMでは不適)
        'following_instructions': [], # 指示が合った場合、従っているか (SteerLMのfails_taskに該当)
        'politeness': [], # 礼儀正しさ (minnadechat)
        'harmfulness': [], # 有性 (minnadechat)

        # 0: 分からない(未設定) 1: 無 2: 有
        'typos': [], # 誤字脱字 (minnadechat)
        'hate': [], # ヘイト
        'sexual': [], # 性的
        'violence': [], # 暴力
        'suicide': [], # 自殺
        'threat': [], # 犯罪
        'gun': [], # 重火器
        'controlled_substance': [], # 規制対象物質
        'criminal_planing': [], # 犯罪計画
        'privacy': [], # 個人情報
        'harassment': [], # ハラスメント
        'profanity': [], # 冒涜
        'political_content': [], # 政治的内容 (SteerLM)
        'moral_judgement': [], # 非道徳 (SteerLM)
    })
)

is_dataset_loaded = gr.State(False) # データセットロード状態

you_dataset_id = gr.State(0)  # 回答者がアノテーションしているデータのID
dataset_name = gr.State("")  # 編集に使用したデータセット名
dataset_id = gr.State(0)  # 加工元データセットのindex
who = gr.State("")  # アノテーション者名

# シンプルモード
unknown_quality = gr.State(False)  # 分からない(未設定)
good = gr.State(False)  # 良
bad = gr.State(False)  # 悪

initial_answer_text_1 = gr.State("")  # 回答1を整形したかチェック用
initial_answer_text_2 = gr.State("")  # 回答2を整形したかチェック用

is_proofreading_1 = gr.State(False)  # 回答1を整形したか_1
answer_text_1 = gr.State("")  # answer_1 回答
is_proofreading_2 = gr.State(False)  # 回答2を整形したか_2
answer_text_2 = gr.State("")  # answer_2 回答

# (5段階評価) 
# 0は判断していない、又は判断が難しいという特徴量にする
score = gr.State(3)  # 総合スコア 初期値は3 (最低限必須)
helpfulness = gr.State(0) # 有用性
correctness = gr.State(0) # 正確さ
coherence = gr.State(0) # 一貫性
complexity = gr.State(0) # 複雑さ
verbosity = gr.State(0) # 冗長性
humor = gr.State(0) # ユーモア (SteerLM)
creativity = gr.State(0) # 創造性 (SteerLM)
appropriate = gr.State(0) # 適切性 (SteerLMではnot_appropriate)
following_instructions = gr.State(0) # 指示への忠実度 (SteerLMのfails_task)
politeness = gr.State(0) # 礼儀正しさ (minnadechat)
harmfulness = gr.State(0) # 有度 (minnadechat)

# (有無評価)
# 0は判断していない、又は判断が難しいという特徴量にする
hate = gr.State(0) # ヘイト
sexual = gr.State(0) # 性的
violence = gr.State(0) # 暴力
suicide = gr.State(0) # 自殺
threat = gr.State(0) # 犯罪
gun = gr.State(0) # 銃・重火器
controlled_substance = gr.State(0) # 規制対象物質
criminal_planing = gr.State(0) # 犯罪計画
privacy = gr.State(0) # 個人情報・プライバシー情報
harassment = gr.State(0) # ハラスメント
profanity = gr.State(0) # 冒涜
political_content = gr.State(0) # 政治的内容 (SteerLM)
moral_judgement = gr.State(0) # 非道徳 (SteerLM)
typos = gr.State(0) # 誤字脱字(minnadechat)

# 未整理　========================================

# データセット読み込み
def dataset_load_fn() -> Tuple[
        str,
        str,
        str,
        str,
        str,
        str,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        gr.update,
        ]:

    is_dataset_loaded.value = False  # ロード状態初期化

    select_dataset.value = load_dataset(
        select_dropdown_dataset.value
    )

    # DatasetオブジェクトをPandas DataFrameに変換
    df = select_dataset.value["train"].to_pandas()

    # ランダム化実施
    # index列を追加し、シャッフル
    df = df.reset_index(drop=False)  # 元のindexを保持
    df = df.sample(frac=1).reset_index(drop=True)  # シャッフル
    select_dataset.value["train"] = df  # シャッフルされたDataFrameを格納

    select_idx.value = 0  # index初期化
    select_dataset_total_len.value = len(df)  # 長さを取得
    is_dataset_loaded.value = True  # ロード完了

    # データロード時にテキストの初期値を設定
    initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
    initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]

    return df.iloc[select_idx.value]["question"], \
        df.iloc[select_idx.value]["answer"], \
        df.iloc[select_idx.value]["question"], \
        df.iloc[select_idx.value]["answer"], \
        df.iloc[select_idx.value]["question"], \
        df.iloc[select_idx.value]["answer"], \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True), \
        gr.update(interactive=True)


# データの保存処理　========================================

# Spaceの場合の保存先はCommitSchedulerのpath_in_repoフォルダ
# (ローカル開発の場合./user_annotationフォルダにjsonファイルが作成される)
annotation_file = Path("user_annotation/") / f"data_{uuid.uuid4()}.json"
annotated_folder = annotation_file.parent

scheduler = CommitScheduler(
    repo_id=OUTPUT_DATASET[1],
    repo_type="dataset",
    folder_path=annotated_folder,
    path_in_repo="data",  # Spaceの場合の保存先フォルダー
    private=True,
    every=5,  # 5分毎にアップロード HuggingFAce_Documentの最低推奨値
)

# チェックボックスをintに変換
def checkbox_to_int(checkbox_value) -> int:
    if checkbox_value == "不明":
        output = 0
    elif checkbox_value == "有":
        output = 1
    elif checkbox_value == "無":
        output = 2
    else :
        output = 0
        print("error: ")
    return output

# CommitScheduler (HFへのデータアップロード
def save_annotation(
        dataset_name: str,
        dataset_id: int,
        who: str,
        unknown_quality: bool,
        good: bool,
        bad: bool,
        is_proofreading_1: bool,
        answer_text_1: str,
        is_proofreading_2: bool,
        answer_text_2: str,
        score: int,
        helpfulness: int,
        correctness: int,
        coherence: int,
        complexity: int,
        verbosity: int,
        humor: int,
        creativity: int,
        appropriate: int,
        following_instructions: int,
        politeness: int,
        harmfulness: int,
        hate: int,
        sexual: int,
        violence: int,
        suicide: int,
        threat: int,
        gun: int,
        controlled_substance: int,
        criminal_planing: int,
        privacy: int,
        harassment: int,
        profanity: int,
        political_content: int,
        moral_judgement: int,
        typos: int,
        ) -> None:

    annotated_dataset.value = pd.concat([
        annotated_dataset.value,
        pd.DataFrame({
            'dataset_name': [dataset_name],
            'dataset_id': [dataset_id],
            'who': [who],
            'unknown_quality': [unknown_quality],
            'good': [good],
            'bad': [bad],
            'is_proofreading_1': [is_proofreading_1],
            "answer_text_1": [answer_text_1],
            'is_proofreading_2': [is_proofreading_2],
            'answer_text_2': [answer_text_2],
            'score': [score],
            'helpfulness': [helpfulness],
            'correctness': [correctness],
            'complexity': [complexity],
            'verbosity': [verbosity],
            'humor': [humor],
            'creativity': [creativity],
            'appropriate': [appropriate],
            'following_instructions': [following_instructions],
            'politeness': [politeness],
            'harmfulness': [harmfulness],
            'hate': [hate],
            'sexual': [sexual],
            'violence': [violence],
            'suicide': [suicide],
            'threat': [threat],
            'gun': [gun],
            'controlled_substance': [controlled_substance],
            'criminal_planing': [criminal_planing],
            'privacy': [privacy],
            'harassment': [harassment],
            'profanity': [profanity],
            'political_content': [political_content],
            'moral_judgement': [moral_judgement],
            'types': [typos]
        })], ignore_index=True).reset_index(drop=True)

    # 書き込み
    with scheduler.lock:
        with annotation_file.open("a", encoding='utf-8') as f:
            data_to_write = {
                # "id": , CommitSchedulerは、取得して末尾idを付与することが不可能？
                "datetime": str(datetime.datetime.now().isoformat()),
                "dataset_name": dataset_name,
                "dataset_id": int(dataset_id),
                "who": who,
                "unknown_quality": unknown_quality,
                "good": good,
                "bad": bad,
                "is_proofreading_1": is_proofreading_1,
                "answer_text_1": answer_text_1,
                "is_proofreading_2": is_proofreading_2,
                "answer_text_2": answer_text_2,
                "score": int(score),
                "helpfulness": int(helpfulness),
                "correctness": int(correctness),
                "coherence": int(coherence),
                "complexity": int(complexity),
                "verbosity": int(verbosity),
                "humor": int(humor),
                "creativity": int(creativity),
                "appropriate": int(appropriate),
                "following_instructions": int(following_instructions),
                "politeness": int(politeness),
                "harmfulness": int(harmfulness),
                "hate": int(hate),
                "sexual": int(sexual),
                "violence": int(violence),
                "suicide": int(suicide),
                "threat": int(threat),
                "gun": int(gun),
                "controlled_substance": int(controlled_substance),
                "criminal_planing": int(criminal_planing),
                "privacy": int(privacy),
                "harassment": int(harassment),
                "profanity": int(profanity),
                "political_content": int(political_content),
                "moral_judgement": int(moral_judgement),
                "typos": int(typos)
            }
            f.write(json.dumps(data_to_write, ensure_ascii=False))
            f.write("\n")

# アノテーションの追加処理　========================================


# UI処理 ========================================

# ユーザー名表示

def hello(profile: gr.OAuthProfile | None) -> Tuple[str, str]:
    if profile is None:
        return "プライベートデータセット取得のためにログインしてください。", who.value
    who.value = profile.username
    return f'{profile.username} さん、よろしくお願いいたします。', who.value


# テーマの状態
theme_ = gr.themes.Default()

# 後のCSSデザイン変更用
def load_css():
    with open("style.css", "r") as file:
        css_content = file.read()
    return css_content

# Gradio 画面 ============================================
with gr.Blocks(theme=theme_, css=load_css()) as demo:

    gr.Markdown("# データセット アノテーション for Tanuki (Phase2)")
    
    with gr.Row():
        gr.Markdown("### GENIACにて開発中のLLM用データセットに対してアノテーションするSpaceです\n \
                    入力されたデータは使用される可能性があるため、個人情報・秘匿情報などは入力しないでください。\n \
                    テスト中です。")
        gr_who = gr.TextArea(value=who.value, lines=1, max_lines=1, label="ユーザー名 (入力してください)")
        
        def change_name(name: str):
            who.value = name
        
        gr_who.change(
            change_name,
            inputs=[gr_who],
            outputs=[]
        )

    with gr.Tab("アノテーション (シングルターン)"):

        # HF login 機能
        # with gr.Row(equal_height=True):
            # gr.LoginButton(value="HuggingFace ログイン",
            #                logout_value="HuggingFace ログアウト", scale=1)
            # ユーザー名
            # gr_profile_name = gr.Markdown()
            # demo.load(hello, inputs=None, outputs=[gr_profile_name, who])

        with gr.Row():

            def dropdown_select(select_value) -> None:
                select_dropdown_dataset.value = select_value

            # 対象データセットの選択
            gr_dropdown_dataset = gr.Dropdown(
                label="データセット選択 ①",
                choices=dropdown_dataset_list.value,
                value=select_dropdown_dataset.value,
                elem_id="dataset_sel",
                scale=2)

            gr_dropdown_dataset.change(
                dropdown_select,
                inputs=[gr_dropdown_dataset]
            )

            gr_data_load_btn = gr.Button("② データセットを読み込む")

        with gr.Column() as content_column:

            with gr.Tab("③ シンプル(良・悪)"):
                with gr.Column():
                    with gr.Row(equal_height=True):
                        gr_good_btn = gr.Button("良い", interactive=False)
                        gr_unknown_btn = gr.Button(
                            "分からない", interactive=False)  # 「分からない」ボタンを追加
                        gr_bad_btn = gr.Button("悪い", interactive=False)

                gr_question_text_1_1 = gr.Textbox(
                    label="質問: ", lines=5, interactive=False)

                gr_answer_text_1_1 = gr.Textbox(
                    label="回答: 訂正して頂けると、品質が上がります。",
                    lines=20,
                    interactive=False)

            with gr.Tab("③ ５段階評価(シンプル)"):

                with gr.Row() as simple_score_btn:
                    gr_score_5_btn = gr.Button("5: 高品質", interactive=False)
                    gr_score_4_btn = gr.Button("4: 良い", interactive=False)
                    gr_score_3_btn = gr.Button("3: 普通", interactive=False)
                    gr_score_2_btn = gr.Button("2: 悪い", interactive=False)
                    gr_score_1_btn = gr.Button("1: 低品質", interactive=False)
                    
                gr_question_text_2_1 = gr.Textbox(
                    label="質問: ", lines=5, interactive=False)

                gr_answer_text_2_1 = gr.Textbox(
                    label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False)
                
            with gr.Tab("③ ５段階評価 (詳細)"):
                
                with gr.Row():
                    
                    with gr.Column() as EvalFive:

                        gr_question_text_3_1 = gr.Textbox(
                            label="質問: ", lines=10, interactive=False)
                        
                        # gr_explain = gr.Markdown("回答")
                        
                        gr_answer_text_3_1 = gr.Textbox(
                            label="回答: 訂正して頂けると、品質が上がります。", lines=35, interactive=False)
                        
                    with gr.Column() as EvalFiveDetail:
                        
                        with gr.Row():
                            gr_submit_score = gr.Button("評価送信", interactive=False)
                            gr_score_reset = gr.Button("スコアリセット", interactive=False)
                            
                        gr_score_detail = gr.Slider(label="総合スコア　【必須】",value=3, minimum=1, maximum=5, step=1,  interactive=False)
                        gr_eval_annotation_explain = gr.Markdown("詳細アノテーション (5点満点)")
                       
                        with gr.Row() :
                            gr_helpfulness = gr.Slider(label="有用性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_correctness = gr.Slider(label="正確さ", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_coherence = gr.Slider(label="一貫性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_complexity = gr.Slider(label="複雑さ", minimum=0, value=0, maximum=5, step=1, interactive=False)
                        
                        with gr.Row() :
                            gr_verbosity = gr.Slider(label="冗長性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_humor = gr.Slider(label="ユーモア", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_creativity = gr.Slider(label="創造性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_appropriate = gr.Slider(label="適切性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            
                        with gr.Row() :
                            gr_following_instructions = gr.Slider(label="忠実性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_politeness = gr.Slider(label="礼儀正しさ", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_harmfulness = gr.Slider(label="有害性", minimum=0, value=0, maximum=5, step=1, interactive=False)
                            gr_text_4 = gr.Markdown("判断可能な物のみ\n\nスコアを付けてください。")
                            
                        gr_text_5 = gr.Markdown("")

                        with gr.Row():
                            gr_typos = gr.Radio(label="誤字・脱字", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_moral_judgement = gr.Radio(label="非道徳", choices=["不明", "有", "無"], value="不明", interactive=False)

                        with gr.Row():
                            gr_hate = gr.Radio(label="ヘイト", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_sexual = gr.Radio(label="性的内容", choices=["不明", "有", "無"], value="不明", interactive=False)
                            
                        with gr.Row():
                            gr_violence = gr.Radio(label="暴力的", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_suicide = gr.Radio(label="自殺行為", choices=["不明", "有", "無"], value="不明", interactive=False)
                            
                        with gr.Row():
                            gr_threat = gr.Radio(label="犯罪", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_gun = gr.Radio(label="銃等", choices=["不明", "有", "無"], value="不明", interactive=False)
                            
                        with gr.Row():
                            gr_controlled_substance = gr.Radio(label="規制対象物質", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_criminal_planing = gr.Radio(label="犯罪計画", choices=["不明", "有", "無"], value="不明", interactive=False)
                            
                        with gr.Row():
                            gr_privacy = gr.Radio(label="個人情報", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_harassment = gr.Radio(label="ハラスメント", choices=["不明", "有", "無"], value="不明", interactive=False)
                            
                        with gr.Row():
                            gr_profanity = gr.Radio(label="冒涜行為", choices=["不明", "有", "無"], value="不明", interactive=False)
                            gr_political_content = gr.Radio(label="政治的内容", choices=["不明", "有", "無"], value="不明", interactive=False)
                        

            # 5段階評価 詳細のgr.State Update
            def eval_submit(
                # text
                g_answer_text_3_1: str,
                
                # slicer
                g_score: int,
                g_helpfulness: int,
                g_correctness: int,
                g_coherence: int,
                g_complexity: int,
                g_verbosity: int,
                g_humor: int,
                g_creativity: int,
                g_appropriate: int,
                g_following_instructions: int,
                g_politeness: int,
                g_harmfulness: int,

                # checkbox
                g_hate: str,
                g_sexual: str,
                g_violence: str,
                g_suicide: str,
                g_threat: str,
                g_gun: str,
                g_controlled_substance: str,
                g_criminal_planing: str,
                g_privacy: str,
                g_harassment: str,
                g_profanity: str,
                g_political_content: str,
                g_moral_judgement: str,
                g_typos: str,
            ):
                # 状態初期化
                good.value = False
                bad.value = False
                unknown_quality.value = False
                
                # テキストに更新があったかどうかチェック
                if initial_answer_text_1.value != g_answer_text_3_1:
                    is_proofreading_1.value = True
                    answer_text_1.value = g_answer_text_3_1
                else:
                    is_proofreading_1.value = False
                    answer_text_1.value = ""
                
                # 送信用データ更新
                score.value = g_score
                helpfulness.value = g_helpfulness
                correctness.value = g_correctness
                coherence.value = g_coherence
                complexity.value = g_complexity
                verbosity.value = g_verbosity
                humor.value = g_humor
                creativity.value = g_creativity
                appropriate.value = g_appropriate
                following_instructions.value = g_following_instructions
                politeness.value = g_politeness
                harmfulness.value = g_harmfulness
                
                hate.value = checkbox_to_int(g_hate)
                sexual.value = checkbox_to_int(g_sexual)
                violence.value = checkbox_to_int(g_violence)
                suicide.value = checkbox_to_int(g_suicide)
                threat.value = checkbox_to_int(g_threat)
                gun.value = checkbox_to_int(g_gun)
                controlled_substance.value = checkbox_to_int(g_controlled_substance)
                criminal_planing.value = checkbox_to_int(g_criminal_planing)
                privacy.value = checkbox_to_int(g_privacy)
                harassment.value = checkbox_to_int(g_harassment)
                profanity.value = checkbox_to_int(g_profanity)
                political_content.value = checkbox_to_int(g_political_content)
                moral_judgement.value = checkbox_to_int(g_moral_judgement)
                typos.value = checkbox_to_int(g_typos)
                
                # データ送信
                
                # 表示更新
                # indexを進める
                select_idx.value += 1

                df = select_dataset.value["train"]

                # ループさせるか、エラー処理を行う
                if select_idx.value >= len(df):
                    select_idx.value = 0

                # データセットに追加
                df = select_dataset.value["train"]
                save_annotation(
                    select_dropdown_dataset.value,
                    df.iloc[select_idx.value]['index'],
                    who.value,
                    unknown_quality.value,
                    good.value,
                    bad.value,
                    is_proofreading_1.value,
                    answer_text_1.value,
                    is_proofreading_2.value,
                    answer_text_2.value,
                    score.value,
                    helpfulness.value,
                    correctness.value,
                    coherence.value,
                    complexity.value,
                    verbosity.value,
                    humor.value,
                    creativity.value,
                    appropriate.value,
                    following_instructions.value,
                    politeness.value,
                    harmfulness.value,
                    hate.value,
                    sexual.value,
                    violence.value,
                    suicide.value,
                    threat.value,
                    gun.value,
                    controlled_substance.value,
                    criminal_planing.value,
                    privacy.value,
                    harassment.value,
                    profanity.value,
                    political_content.value,
                    moral_judgement.value,
                    typos.value,
                )

                # Nextデータ初期化
                initialize_next_data(df)
                
                # 5段階評価をリセット
                score_reset_display()
                
                # UI更新のための返り値
                return gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    *score_reset_display()  # スコアリセット用の返り値
            
            gr_submit_score.click(
                eval_submit,
                inputs=[
                    gr_answer_text_3_1,
                    gr_score_detail,
                    gr_helpfulness,
                    gr_correctness,
                    gr_coherence,
                    gr_complexity,
                    gr_verbosity,
                    gr_humor,
                    gr_creativity,
                    gr_appropriate,
                    gr_following_instructions,
                    gr_politeness,
                    gr_harmfulness,
                    gr_hate,
                    gr_sexual,
                    gr_violence,
                    gr_suicide,
                    gr_threat,
                    gr_gun,
                    gr_controlled_substance,
                    gr_criminal_planing,
                    gr_privacy,
                    gr_harassment,
                    gr_profanity,
                    gr_political_content,
                    gr_moral_judgement,
                    gr_typos
                    ],
                # UI更新
                outputs=[
                    gr_question_text_1_1,
                    gr_answer_text_1_1,
                    gr_question_text_2_1,
                    gr_answer_text_2_1,
                    gr_question_text_3_1,
                    gr_answer_text_3_1,
                    gr_score_detail,
                    gr_helpfulness,
                    gr_correctness,
                    gr_coherence,
                    gr_complexity,
                    gr_verbosity,
                    gr_humor,
                    gr_creativity,
                    gr_appropriate,
                    gr_following_instructions,
                    gr_politeness,
                    gr_harmfulness,
                    gr_hate,
                    gr_sexual,
                    gr_violence,
                    gr_suicide,
                    gr_threat,
                    gr_gun,
                    gr_controlled_substance,
                    gr_criminal_planing,
                    gr_privacy,
                    gr_harassment,
                    gr_profanity,
                    gr_political_content,
                    gr_moral_judgement,
                    gr_typos
                ],
            )

            # 5段階評価ボタンのクリックイベントを定義
            def score_button_clicked(button_value):
                # 状態初期化
                good.value = False
                bad.value = False
                unknown_quality.value = False
                score.value = button_value            

            # 5段階評価クリック
            def update_annotation(
                input_ans_1: str = None,
                input_ans_2: str = None,
                is_good: bool = None,  # good/bad を表すフラグを追加
                is_unknown: bool = None,  # 「分からない」を表すフラグを追加
                is_simple: bool = None,
                score_value: int = None  # 5段階評価の値、good/badの場合はNone
            ) -> Tuple[
                gr.update,
                gr.update,
                gr.update,
                gr.update,
                gr.update,
                ]:

                # good/bad/unknown と score の状態を更新
                update_evaluation_state(is_good, is_unknown, score_value)

                # 変更を検知 (5段階評価の場合も処理するように変更)
                update_answer_state(input_ans_1, input_ans_2)
                
                # 表示更新
                # indexを進める
                select_idx.value += 1

                df = select_dataset.value["train"]

                # ループさせるか、エラー処理を行う
                if select_idx.value >= len(df):
                    select_idx.value = 0
                    
                # シンプルモードクリックで、scoreを0に設定
                if is_simple == True:
                    score.value = 0

                # データセットに追加
                # 元のindex番号(dataset_id)を指定して保存                
                save_annotation_data(df)

                # Nextデータ初期化
                initialize_next_data(df)
                
                # 5段階評価をリセット
                reset_values = score_reset_display()

                return gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    gr.update(value=df.iloc[select_idx.value]["question"]), \
                    gr.update(value=df.iloc[select_idx.value]["answer"]), \
                    *reset_values


            # 評価状態リセット
            def update_evaluation_state(is_good, is_unknown, score_value):
                if score_value is not None:  # 5段階評価の場合
                    good.value = False
                    bad.value = False
                    unknown_quality.value = False
                    score.value = score_value
                else:  # good/bad/unknown 評価の場合
                    good.value = is_good
                    bad.value = not is_good if not is_unknown else False
                    unknown_quality.value = is_unknown  # 「分からない」の状態を設定

            # 変更を検知して値を設定
            def update_answer_state(input_ans_1, input_ans_2):
                if input_ans_1 is not None and initial_answer_text_1.value != input_ans_1:
                    is_proofreading_1.value = True
                    answer_text_1.value = input_ans_1
                else:
                    answer_text_1.value = ""

                # 2ターン目用
                if input_ans_2 is not None and initial_answer_text_2.value != input_ans_2:
                    is_proofreading_2.value = True
                    answer_text_2.value = input_ans_2
                else:
                    answer_text_2.value = ""

            # データ保存
            def save_annotation_data(df):
                save_annotation(
                    select_dropdown_dataset.value,
                    # datasetIdは元のindex番号を使用
                    df.iloc[select_idx.value]['index'],
                    who.value,
                    unknown_quality.value,
                    good.value,
                    bad.value,
                    is_proofreading_1.value,
                    answer_text_1.value,
                    is_proofreading_2.value,
                    answer_text_2.value,
                    score.value,
                    helpfulness.value,
                    correctness.value,
                    coherence.value,
                    complexity.value,
                    verbosity.value,
                    humor.value,
                    creativity.value,
                    appropriate.value,
                    following_instructions.value,
                    politeness.value,
                    harmfulness.value,
                    hate.value,
                    sexual.value,
                    violence.value,
                    suicide.value,
                    threat.value,
                    gun.value,
                    controlled_substance.value,
                    criminal_planing.value,
                    privacy.value,
                    harassment.value,
                    profanity.value,
                    political_content.value,
                    moral_judgement.value,
                    typos.value,
                )

            # 次データ読込時の初期化
            def initialize_next_data(df):
                is_proofreading_1.value = False
                is_proofreading_2.value = False
                initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
                initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]
                
            # 評価不可
            def unknown_click(input_ans_1, input_ans_2):
                good.value = False
                bad.value = False
                score_reset_display()
                return update_annotation(
                    input_ans_1=input_ans_1,
                    input_ans_2=input_ans_2,
                    is_good=False,
                    is_unknown=True,
                    is_simple=True,
                    )

            # 評価不可クリックイベント
            gr_unknown_btn.click(
                unknown_click,
                inputs=[
                    gr_answer_text_1_1,
                    gr_answer_text_2_1
                ],
                outputs=[
                        gr_question_text_1_1,
                        gr_answer_text_1_1,
                        gr_question_text_2_1,
                        gr_answer_text_2_1,
                        gr_question_text_3_1,
                        gr_answer_text_3_1,
                        gr_score_detail,
                        gr_helpfulness,
                        gr_correctness,
                        gr_coherence,
                        gr_complexity,
                        gr_verbosity,
                        gr_humor,
                        gr_creativity,
                        gr_appropriate,
                        gr_following_instructions,
                        gr_politeness,
                        gr_harmfulness,
                        gr_hate,
                        gr_sexual,
                        gr_violence,
                        gr_suicide,
                        gr_threat,
                        gr_gun,
                        gr_controlled_substance,
                        gr_criminal_planing,
                        gr_privacy,
                        gr_harassment,
                        gr_profanity,
                        gr_political_content,
                        gr_moral_judgement,
                        gr_typos
                        ]
            )

            # 良いクリック
            def good_click(input_ans_1, input_ans_2):
                score_reset_display()
                return update_annotation(
                    input_ans_1=input_ans_1,
                    input_ans_2=input_ans_2,
                    is_good=True,
                    is_unknown=False,
                    is_simple=True,
                    )

            # 良いクリック
            gr_good_btn.click(
                good_click,
                inputs=[
                    gr_answer_text_1_1,
                    gr_answer_text_2_1
                ],
                outputs=[
                        gr_question_text_1_1,
                        gr_answer_text_1_1,
                        gr_question_text_2_1,
                        gr_answer_text_2_1,
                        gr_question_text_3_1,
                        gr_answer_text_3_1,
                        gr_score_detail,
                        gr_helpfulness,
                        gr_correctness,
                        gr_coherence,
                        gr_complexity,
                        gr_verbosity,
                        gr_humor,
                        gr_creativity,
                        gr_appropriate,
                        gr_following_instructions,
                        gr_politeness,
                        gr_harmfulness,
                        gr_hate,
                        gr_sexual,
                        gr_violence,
                        gr_suicide,
                        gr_threat,
                        gr_gun,
                        gr_controlled_substance,
                        gr_criminal_planing,
                        gr_privacy,
                        gr_harassment,
                        gr_profanity,
                        gr_political_content,
                        gr_moral_judgement,
                        gr_typos
                    ]
            )            

            # 低評価クリック
            def bad_click(input_ans_1, input_ans_2):
                score_reset_display()
                return update_annotation(
                    input_ans_1=input_ans_1,
                    input_ans_2=input_ans_2,
                    is_good=False,
                    is_unknown=False,
                    is_simple=True,
                    )

            # 低評価クリックイベント
            gr_bad_btn.click(
                bad_click,
                inputs=[
                    gr_answer_text_1_1,
                    gr_answer_text_2_1
                ],
                outputs=[
                        gr_question_text_1_1,
                        gr_answer_text_1_1,
                        gr_question_text_2_1,
                        gr_answer_text_2_1,
                        gr_question_text_3_1,
                        gr_answer_text_3_1,
                        gr_score_detail,
                        gr_helpfulness,
                        gr_correctness,
                        gr_coherence,
                        gr_complexity,
                        gr_verbosity,
                        gr_humor,
                        gr_creativity,
                        gr_appropriate,
                        gr_following_instructions,
                        gr_politeness,
                        gr_harmfulness,
                        gr_hate,
                        gr_sexual,
                        gr_violence,
                        gr_suicide,
                        gr_threat,
                        gr_gun,
                        gr_controlled_substance,
                        gr_criminal_planing,
                        gr_privacy,
                        gr_harassment,
                        gr_profanity,
                        gr_political_content,
                        gr_moral_judgement,
                        gr_typos
                        ]
            )

            # 5段階評価ボタンのクリックイベント
            gr_score_1_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=1),
                            inputs=[gr_answer_text_2_1],
                            outputs=[
                                gr_question_text_1_1,
                                gr_answer_text_1_1,
                                gr_question_text_2_1,
                                gr_answer_text_2_1,
                                gr_question_text_3_1,
                                gr_answer_text_3_1,
                                gr_score_detail,
                                gr_helpfulness,
                                gr_correctness,
                                gr_coherence,
                                gr_complexity,
                                gr_verbosity,
                                gr_humor,
                                gr_creativity,
                                gr_appropriate,
                                gr_following_instructions,
                                gr_politeness,
                                gr_harmfulness,
                                gr_hate,
                                gr_sexual,
                                gr_violence,
                                gr_suicide,
                                gr_threat,
                                gr_gun,
                                gr_controlled_substance,
                                gr_criminal_planing,
                                gr_privacy,
                                gr_harassment,
                                gr_profanity,
                                gr_political_content,
                                gr_moral_judgement,
                                gr_typos
                                ])
            gr_score_2_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=2),
                            inputs=[gr_answer_text_2_1],
                            outputs=[
                                gr_question_text_1_1,
                                gr_answer_text_1_1,
                                gr_question_text_2_1,
                                gr_answer_text_2_1,
                                gr_question_text_3_1,
                                gr_answer_text_3_1,
                                gr_score_detail,
                                gr_helpfulness,
                                gr_correctness,
                                gr_coherence,
                                gr_complexity,
                                gr_verbosity,
                                gr_humor,
                                gr_creativity,
                                gr_appropriate,
                                gr_following_instructions,
                                gr_politeness,
                                gr_harmfulness,
                                gr_hate,
                                gr_sexual,
                                gr_violence,
                                gr_suicide,
                                gr_threat,
                                gr_gun,
                                gr_controlled_substance,
                                gr_criminal_planing,
                                gr_privacy,
                                gr_harassment,
                                gr_profanity,
                                gr_political_content,
                                gr_moral_judgement,
                                gr_typos
                                ])
            gr_score_3_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=3),
                            inputs=[gr_answer_text_2_1],
                            outputs=[
                                gr_question_text_1_1,
                                gr_answer_text_1_1,
                                gr_question_text_2_1,
                                gr_answer_text_2_1,
                                gr_question_text_3_1,
                                gr_answer_text_3_1,
                                gr_score_detail,
                                gr_helpfulness,
                                gr_correctness,
                                gr_coherence,
                                gr_complexity,
                                gr_verbosity,
                                gr_humor,
                                gr_creativity,
                                gr_appropriate,
                                gr_following_instructions,
                                gr_politeness,
                                gr_harmfulness,
                                gr_hate,
                                gr_sexual,
                                gr_violence,
                                gr_suicide,
                                gr_threat,
                                gr_gun,
                                gr_controlled_substance,
                                gr_criminal_planing,
                                gr_privacy,
                                gr_harassment,
                                gr_profanity,
                                gr_political_content,
                                gr_moral_judgement,
                                gr_typos
                                ])
            gr_score_4_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=4),
                            inputs=[gr_answer_text_2_1],
                            outputs=[
                                gr_question_text_1_1,
                                gr_answer_text_1_1,
                                gr_question_text_2_1,
                                gr_answer_text_2_1,
                                gr_question_text_3_1,
                                gr_answer_text_3_1,
                                gr_score_detail,
                                gr_helpfulness,
                                gr_correctness,
                                gr_coherence,
                                gr_complexity,
                                gr_verbosity,
                                gr_humor,
                                gr_creativity,
                                gr_appropriate,
                                gr_following_instructions,
                                gr_politeness,
                                gr_harmfulness,
                                gr_hate,
                                gr_sexual,
                                gr_violence,
                                gr_suicide,
                                gr_threat,
                                gr_gun,
                                gr_controlled_substance,
                                gr_criminal_planing,
                                gr_privacy,
                                gr_harassment,
                                gr_profanity,
                                gr_political_content,
                                gr_moral_judgement,
                                gr_typos
                                ])
            gr_score_5_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=5),
                            inputs=[gr_answer_text_2_1],
                            outputs=[
                                gr_question_text_1_1,
                                gr_answer_text_1_1,
                                gr_question_text_2_1,
                                gr_answer_text_2_1,
                                gr_question_text_3_1,
                                gr_answer_text_3_1,
                                gr_score_detail,
                                gr_helpfulness,
                                gr_correctness,
                                gr_coherence,
                                gr_complexity,
                                gr_verbosity,
                                gr_humor,
                                gr_creativity,
                                gr_appropriate,
                                gr_following_instructions,
                                gr_politeness,
                                gr_harmfulness,
                                gr_hate,
                                gr_sexual,
                                gr_violence,
                                gr_suicide,
                                gr_threat,
                                gr_gun,
                                gr_controlled_substance,
                                gr_criminal_planing,
                                gr_privacy,
                                gr_harassment,
                                gr_profanity,
                                gr_political_content,
                                gr_moral_judgement,
                                gr_typos
                                ])

            # Scoreリセット
            def score_reset_display():
                # gr.State
                score.value = 3
                helpfulness.value = 0
                correctness.value = 0
                coherence.value = 0
                complexity.value = 0
                verbosity.value = 0
                humor.value = 0
                creativity.value = 0
                appropriate.value = 0
                following_instructions.value = 0
                politeness.value = 0
                harmfulness.value = 0
                hate.value = 0
                sexual.value = 0
                violence.value = 0
                suicide.value = 0
                threat.value = 0
                gun.value = 0
                controlled_substance.value = 0
                criminal_planing.value = 0
                privacy.value = 0
                harassment.value = 0
                profanity.value = 0
                political_content.value = 0
                moral_judgement.value = 0
                typos.value = 0
                
                return 3,0,0,0,0,0,0,0,0,0,0,0, \
                    "不明", "不明", "不明", "不明", "不明", \
                    "不明", "不明", "不明", "不明", "不明", \
                    "不明", "不明", "不明", "不明"
                            
            # Scoreリセット
            gr_score_reset.click(
                score_reset_display,
                inputs=[],
                outputs=[
                    gr_score_detail,
                    gr_helpfulness,
                    gr_correctness,
                    gr_coherence,
                    gr_complexity,
                    gr_verbosity,
                    gr_humor,
                    gr_creativity,
                    gr_appropriate,
                    gr_following_instructions,
                    gr_politeness,
                    gr_harmfulness,
                    gr_hate,
                    gr_sexual,
                    gr_violence,
                    gr_suicide,
                    gr_threat,
                    gr_gun,
                    gr_controlled_substance,
                    gr_criminal_planing,
                    gr_privacy,
                    gr_harassment,
                    gr_profanity,
                    gr_political_content,
                    gr_moral_judgement,
                    gr_typos
                ]
            )
            
            # データ読込
            gr_data_load_btn.click(
                dataset_load_fn,
                inputs=None,
                # textをセットし、interactiveをTrueにする (UI更新)
                outputs=[
                        gr_question_text_1_1,
                        gr_answer_text_1_1,
                        gr_question_text_2_1,
                        gr_answer_text_2_1,
                        gr_question_text_3_1,
                        gr_answer_text_3_1,
                        gr_answer_text_1_1,
                        gr_answer_text_2_1,
                        gr_answer_text_3_1,
                        gr_unknown_btn,
                        gr_good_btn,
                        gr_bad_btn,
                        gr_score_5_btn,
                        gr_score_4_btn,
                        gr_score_3_btn,
                        gr_score_2_btn,
                        gr_score_1_btn,
                        gr_submit_score,
                        gr_score_detail,
                        gr_score_reset,
                        gr_helpfulness,
                        gr_correctness,
                        gr_coherence,
                        gr_complexity,
                        gr_verbosity,
                        gr_humor,
                        gr_creativity,
                        gr_appropriate,
                        gr_following_instructions,
                        gr_politeness,
                        gr_harmfulness,
                        gr_hate,
                        gr_sexual,
                        gr_violence,
                        gr_suicide,
                        gr_threat,
                        gr_gun,
                        gr_controlled_substance,
                        gr_criminal_planing,
                        gr_privacy,
                        gr_harassment,
                        gr_profanity,
                        gr_political_content,
                        gr_moral_judgement,
                        gr_typos
                    ]
            )
            
            # TODO Tab切り替えで、アノテ済みの一覧を表示する
            # with gr.Tab("アノテ済みデータセット(管理画面)"):
            # タブを切り替えた時にデータ表示を更新する

if __name__ == "__main__":
    demo.launch()