Spaces:

kevineen
/

tanuki_annotation_phase2

Running

App Files Files Community

tanuki_annotation_phase2 / app.py

kevineen

typo 修正

19ee3e1 7 days ago

raw history blame contribute delete

No virus

59.3 kB

	import json
	import datetime
	from pathlib import Path
	import uuid
	from typing import Tuple

	import pandas as pd

	import gradio as gr
	from datasets import load_dataset
	from huggingface_hub import CommitScheduler

	# HFデータセットアップロード先
	# (切り替えてテストする用に配列)
	OUTPUT_DATASET = [
	"team-hatakeyama-phase2/annotation_tanuki_phase2",
	"kevineen/Tanuki-Phase2-annotation-dataset", # 出力テスト
	]

	# アノテーション対象データセット
	ANNOTATION_DATASET = [
	"hatakeyama-llm-team/AutoGeneratedJapaneseQA",
	"hatakeyama-llm-team/AutoGeneratedJapaneseQA-other",
	"kanhatakeyama/ChatbotArenaJaMixtral8x22b",
	"kanhatakeyama/OrcaJaMixtral8x22b",
	"kanhatakeyama/LogicalDatasetsByMixtral8x22b",

	# データ形式未対応（対応予定
	# "hatakeyama-llm-team/WikiBookJa",
	# "kanhatakeyama/AutoWikiQA",
	# "susumuota/SyntheticTextWikiTranslate-askllm-v1", # Ask-LLM 翻訳

	# Nemotron データ (将来用
	]

	MULTI_TURN_ANNOTATION_DATASET = [
	# マルチターン未対応
	"kanhatakeyama/AutoMultiTurnByMixtral8x22b",
	]

	# Session State : (ブラウザセッション単位の変数管理) ===========================

	# UIのEnable/Disable用State
	is_selected_dataset = gr.State(False)
	is_loaded_dataset = gr.State(False)

	# 選択中のデータセットリスト
	dropdown_dataset_list = gr.State(value=ANNOTATION_DATASET)

	# 現在の対象データセット初期値は"hatakeyama-llm-team/AutoGeneratedJapaneseQA",
	select_dropdown_dataset = gr.State(dropdown_dataset_list.value[0])
	select_dataset = gr.State(None) # 現在のデータセット
	select_dataset_total_len = gr.State(0) # 現在のデータセットの長さ
	select_idx = gr.State(0) # 現在のインデックス (ランダムモードにするなら不要？
	# random_mode = gr.State(False) # ランダムモード

	# 回答者がアノテーションしたデータセット
	annotated_dataset = gr.State(
	pd.DataFrame({
	'dataset_name': [], # 対象データセット
	'dataset_id': [], # データセットindex
	'who': [], # アノテーション者名
	'unknown_quality': [], # 不明能
	'good': [], # 良
	'bad': [], # 悪
	'is_proofreading_1': [], # テキストに修正があったか
	'answer_text_1': [],
	'is_proofreading_2': [], # 2ターン目用
	'answer_text_2': [], # 2ターン目用

	# 5段階評価
	'score': [], # 総合 (5段階評価)
	'helpfulness': [], # 有用性　(5段階評価)
	'correctness': [], # 正確さ　(5段階評価)
	'coherence': [], # 一貫性　(5段階評価)
	'complexity': [], # 複雑さ　(5段階評価)
	'verbosity': [], # 冗長性　(5段階評価)
	'humor': [], # ユーモア (SteerLM)
	'creativity': [], # 創造性 (SteerLM)
	'appropriate': [], # 適切性 (SteerLMでは不適)
	'following_instructions': [], # 指示が合った場合、従っているか (SteerLMのfails_taskに該当)
	'politeness': [], # 礼儀正しさ (minnadechat)
	'harmfulness': [], # 有性 (minnadechat)

	# 0: 分からない(未設定) 1: 無 2: 有
	'typos': [], # 誤字脱字 (minnadechat)
	'hate': [], # ヘイト
	'sexual': [], # 性的
	'violence': [], # 暴力
	'suicide': [], # 自殺
	'threat': [], # 犯罪
	'gun': [], # 重火器
	'controlled_substance': [], # 規制対象物質
	'criminal_planing': [], # 犯罪計画
	'privacy': [], # 個人情報
	'harassment': [], # ハラスメント
	'profanity': [], # 冒涜
	'political_content': [], # 政治的内容 (SteerLM)
	'moral_judgement': [], # 非道徳 (SteerLM)
	})
	)

	is_dataset_loaded = gr.State(False) # データセットロード状態

	you_dataset_id = gr.State(0) # 回答者がアノテーションしているデータのID
	dataset_name = gr.State("") # 編集に使用したデータセット名
	dataset_id = gr.State(0) # 加工元データセットのindex
	who = gr.State("") # アノテーション者名

	# シンプルモード
	unknown_quality = gr.State(False) # 分からない(未設定)
	good = gr.State(False) # 良
	bad = gr.State(False) # 悪

	initial_answer_text_1 = gr.State("") # 回答1を整形したかチェック用
	initial_answer_text_2 = gr.State("") # 回答2を整形したかチェック用

	is_proofreading_1 = gr.State(False) # 回答1を整形したか_1
	answer_text_1 = gr.State("") # answer_1 回答
	is_proofreading_2 = gr.State(False) # 回答2を整形したか_2
	answer_text_2 = gr.State("") # answer_2 回答

	# (5段階評価)
	# 0は判断していない、又は判断が難しいという特徴量にする
	score = gr.State(3) # 総合スコア初期値は3 (最低限必須)
	helpfulness = gr.State(0) # 有用性
	correctness = gr.State(0) # 正確さ
	coherence = gr.State(0) # 一貫性
	complexity = gr.State(0) # 複雑さ
	verbosity = gr.State(0) # 冗長性
	humor = gr.State(0) # ユーモア (SteerLM)
	creativity = gr.State(0) # 創造性 (SteerLM)
	appropriate = gr.State(0) # 適切性 (SteerLMではnot_appropriate)
	following_instructions = gr.State(0) # 指示への忠実度 (SteerLMのfails_task)
	politeness = gr.State(0) # 礼儀正しさ (minnadechat)
	harmfulness = gr.State(0) # 有度 (minnadechat)

	# (有無評価)
	# 0は判断していない、又は判断が難しいという特徴量にする
	hate = gr.State(0) # ヘイト
	sexual = gr.State(0) # 性的
	violence = gr.State(0) # 暴力
	suicide = gr.State(0) # 自殺
	threat = gr.State(0) # 犯罪
	gun = gr.State(0) # 銃・重火器
	controlled_substance = gr.State(0) # 規制対象物質
	criminal_planing = gr.State(0) # 犯罪計画
	privacy = gr.State(0) # 個人情報・プライバシー情報
	harassment = gr.State(0) # ハラスメント
	profanity = gr.State(0) # 冒涜
	political_content = gr.State(0) # 政治的内容 (SteerLM)
	moral_judgement = gr.State(0) # 非道徳 (SteerLM)
	typos = gr.State(0) # 誤字脱字(minnadechat)

	# 未整理　========================================

	# データセット読み込み
	def dataset_load_fn() -> Tuple[
	str,
	str,
	str,
	str,
	str,
	str,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	]:

	is_dataset_loaded.value = False # ロード状態初期化

	select_dataset.value = load_dataset(
	select_dropdown_dataset.value
	)

	# DatasetオブジェクトをPandas DataFrameに変換
	df = select_dataset.value["train"].to_pandas()

	# ランダム化実施
	# index列を追加し、シャッフル
	df = df.reset_index(drop=False) # 元のindexを保持
	df = df.sample(frac=1).reset_index(drop=True) # シャッフル
	select_dataset.value["train"] = df # シャッフルされたDataFrameを格納

	select_idx.value = 0 # index初期化
	select_dataset_total_len.value = len(df) # 長さを取得
	is_dataset_loaded.value = True # ロード完了

	# データロード時にテキストの初期値を設定
	initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
	initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]

	return df.iloc[select_idx.value]["question"], \
	df.iloc[select_idx.value]["answer"], \
	df.iloc[select_idx.value]["question"], \
	df.iloc[select_idx.value]["answer"], \
	df.iloc[select_idx.value]["question"], \
	df.iloc[select_idx.value]["answer"], \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True), \
	gr.update(interactive=True)


	# データの保存処理　========================================

	# Spaceの場合の保存先はCommitSchedulerのpath_in_repoフォルダ
	# (ローカル開発の場合./user_annotationフォルダにjsonファイルが作成される)
	annotation_file = Path("user_annotation/") / f"data_{uuid.uuid4()}.json"
	annotated_folder = annotation_file.parent

	scheduler = CommitScheduler(
	repo_id=OUTPUT_DATASET[1],
	repo_type="dataset",
	folder_path=annotated_folder,
	path_in_repo="data", # Spaceの場合の保存先フォルダー
	private=True,
	every=5, # 5分毎にアップロード HuggingFAce_Documentの最低推奨値
	)

	# チェックボックスをintに変換
	def checkbox_to_int(checkbox_value) -> int:
	if checkbox_value == "不明":
	output = 0
	elif checkbox_value == "有":
	output = 1
	elif checkbox_value == "無":
	output = 2
	else :
	output = 0
	print("error: ")
	return output

	# CommitScheduler (HFへのデータアップロード
	def save_annotation(
	dataset_name: str,
	dataset_id: int,
	who: str,
	unknown_quality: bool,
	good: bool,
	bad: bool,
	is_proofreading_1: bool,
	answer_text_1: str,
	is_proofreading_2: bool,
	answer_text_2: str,
	score: int,
	helpfulness: int,
	correctness: int,
	coherence: int,
	complexity: int,
	verbosity: int,
	humor: int,
	creativity: int,
	appropriate: int,
	following_instructions: int,
	politeness: int,
	harmfulness: int,
	hate: int,
	sexual: int,
	violence: int,
	suicide: int,
	threat: int,
	gun: int,
	controlled_substance: int,
	criminal_planing: int,
	privacy: int,
	harassment: int,
	profanity: int,
	political_content: int,
	moral_judgement: int,
	typos: int,
	) -> None:

	annotated_dataset.value = pd.concat([
	annotated_dataset.value,
	pd.DataFrame({
	'dataset_name': [dataset_name],
	'dataset_id': [dataset_id],
	'who': [who],
	'unknown_quality': [unknown_quality],
	'good': [good],
	'bad': [bad],
	'is_proofreading_1': [is_proofreading_1],
	"answer_text_1": [answer_text_1],
	'is_proofreading_2': [is_proofreading_2],
	'answer_text_2': [answer_text_2],
	'score': [score],
	'helpfulness': [helpfulness],
	'correctness': [correctness],
	'complexity': [complexity],
	'verbosity': [verbosity],
	'humor': [humor],
	'creativity': [creativity],
	'appropriate': [appropriate],
	'following_instructions': [following_instructions],
	'politeness': [politeness],
	'harmfulness': [harmfulness],
	'hate': [hate],
	'sexual': [sexual],
	'violence': [violence],
	'suicide': [suicide],
	'threat': [threat],
	'gun': [gun],
	'controlled_substance': [controlled_substance],
	'criminal_planing': [criminal_planing],
	'privacy': [privacy],
	'harassment': [harassment],
	'profanity': [profanity],
	'political_content': [political_content],
	'moral_judgement': [moral_judgement],
	'types': [typos]
	})], ignore_index=True).reset_index(drop=True)

	# 書き込み
	with scheduler.lock:
	with annotation_file.open("a", encoding='utf-8') as f:
	data_to_write = {
	# "id": , CommitSchedulerは、取得して末尾idを付与することが不可能？
	"datetime": str(datetime.datetime.now().isoformat()),
	"dataset_name": dataset_name,
	"dataset_id": int(dataset_id),
	"who": who,
	"unknown_quality": unknown_quality,
	"good": good,
	"bad": bad,
	"is_proofreading_1": is_proofreading_1,
	"answer_text_1": answer_text_1,
	"is_proofreading_2": is_proofreading_2,
	"answer_text_2": answer_text_2,
	"score": int(score),
	"helpfulness": int(helpfulness),
	"correctness": int(correctness),
	"coherence": int(coherence),
	"complexity": int(complexity),
	"verbosity": int(verbosity),
	"humor": int(humor),
	"creativity": int(creativity),
	"appropriate": int(appropriate),
	"following_instructions": int(following_instructions),
	"politeness": int(politeness),
	"harmfulness": int(harmfulness),
	"hate": int(hate),
	"sexual": int(sexual),
	"violence": int(violence),
	"suicide": int(suicide),
	"threat": int(threat),
	"gun": int(gun),
	"controlled_substance": int(controlled_substance),
	"criminal_planing": int(criminal_planing),
	"privacy": int(privacy),
	"harassment": int(harassment),
	"profanity": int(profanity),
	"political_content": int(political_content),
	"moral_judgement": int(moral_judgement),
	"typos": int(typos)
	}
	f.write(json.dumps(data_to_write, ensure_ascii=False))
	f.write("\n")

	# アノテーションの追加処理　========================================


	# UI処理 ========================================

	# ユーザー名表示

	def hello(profile: gr.OAuthProfile \| None) -> Tuple[str, str]:
	if profile is None:
	return "プライベートデータセット取得のためにログインしてください。", who.value
	who.value = profile.username
	return f'{profile.username} さん、よろしくお願いいたします。', who.value


	# テーマの状態
	theme_ = gr.themes.Default()

	# 後のCSSデザイン変更用
	def load_css():
	with open("style.css", "r") as file:
	css_content = file.read()
	return css_content

	# Gradio 画面 ============================================
	with gr.Blocks(theme=theme_, css=load_css()) as demo:

	gr.Markdown("# データセットアノテーション for Tanuki (Phase2)")

	with gr.Row():
	gr.Markdown("### GENIACにて開発中のLLM用データセットに対してアノテーションするSpaceです\n \
	入力されたデータは使用される可能性があるため、個人情報・秘匿情報などは入力しないでください。\n \
	テスト中です。")
	gr_who = gr.TextArea(value=who.value, lines=1, max_lines=1, label="ユーザー名 (入力してください)")

	def change_name(name: str):
	who.value = name

	gr_who.change(
	change_name,
	inputs=[gr_who],
	outputs=[]
	)

	with gr.Tab("アノテーション (シングルターン)"):

	# HF login 機能
	# with gr.Row(equal_height=True):
	# gr.LoginButton(value="HuggingFace ログイン",
	# logout_value="HuggingFace ログアウト", scale=1)
	# ユーザー名
	# gr_profile_name = gr.Markdown()
	# demo.load(hello, inputs=None, outputs=[gr_profile_name, who])

	with gr.Row():

	def dropdown_select(select_value) -> None:
	select_dropdown_dataset.value = select_value

	# 対象データセットの選択
	gr_dropdown_dataset = gr.Dropdown(
	label="データセット選択 ①",
	choices=dropdown_dataset_list.value,
	value=select_dropdown_dataset.value,
	elem_id="dataset_sel",
	scale=2)

	gr_dropdown_dataset.change(
	dropdown_select,
	inputs=[gr_dropdown_dataset]
	)

	gr_data_load_btn = gr.Button("② データセットを読み込む")

	with gr.Column() as content_column:

	with gr.Tab("③ シンプル(良・悪)"):
	with gr.Column():
	with gr.Row(equal_height=True):
	gr_good_btn = gr.Button("良い", interactive=False)
	gr_unknown_btn = gr.Button(
	"分からない", interactive=False) # 「分からない」ボタンを追加
	gr_bad_btn = gr.Button("悪い", interactive=False)

	gr_question_text_1_1 = gr.Textbox(
	label="質問: ", lines=5, interactive=False)

	gr_answer_text_1_1 = gr.Textbox(
	label="回答: 訂正して頂けると、品質が上がります。",
	lines=20,
	interactive=False)

	with gr.Tab("③ ５段階評価(シンプル)"):

	with gr.Row() as simple_score_btn:
	gr_score_5_btn = gr.Button("5: 高品質", interactive=False)
	gr_score_4_btn = gr.Button("4: 良い", interactive=False)
	gr_score_3_btn = gr.Button("3: 普通", interactive=False)
	gr_score_2_btn = gr.Button("2: 悪い", interactive=False)
	gr_score_1_btn = gr.Button("1: 低品質", interactive=False)

	gr_question_text_2_1 = gr.Textbox(
	label="質問: ", lines=5, interactive=False)

	gr_answer_text_2_1 = gr.Textbox(
	label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False)

	with gr.Tab("③ ５段階評価 (詳細)"):

	with gr.Row():

	with gr.Column() as EvalFive:

	gr_question_text_3_1 = gr.Textbox(
	label="質問: ", lines=10, interactive=False)

	# gr_explain = gr.Markdown("回答")

	gr_answer_text_3_1 = gr.Textbox(
	label="回答: 訂正して頂けると、品質が上がります。", lines=35, interactive=False)

	with gr.Column() as EvalFiveDetail:

	with gr.Row():
	gr_submit_score = gr.Button("評価送信", interactive=False)
	gr_score_reset = gr.Button("スコアリセット", interactive=False)

	gr_score_detail = gr.Slider(label="総合スコア　【必須】",value=3, minimum=1, maximum=5, step=1, interactive=False)
	gr_eval_annotation_explain = gr.Markdown("詳細アノテーション (5点満点)")

	with gr.Row() :
	gr_helpfulness = gr.Slider(label="有用性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_correctness = gr.Slider(label="正確さ", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_coherence = gr.Slider(label="一貫性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_complexity = gr.Slider(label="複雑さ", minimum=0, value=0, maximum=5, step=1, interactive=False)

	with gr.Row() :
	gr_verbosity = gr.Slider(label="冗長性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_humor = gr.Slider(label="ユーモア", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_creativity = gr.Slider(label="創造性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_appropriate = gr.Slider(label="適切性", minimum=0, value=0, maximum=5, step=1, interactive=False)

	with gr.Row() :
	gr_following_instructions = gr.Slider(label="忠実性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_politeness = gr.Slider(label="礼儀正しさ", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_harmfulness = gr.Slider(label="有害性", minimum=0, value=0, maximum=5, step=1, interactive=False)
	gr_text_4 = gr.Markdown("判断可能な物のみ\n\nスコアを付けてください。")

	gr_text_5 = gr.Markdown("")

	with gr.Row():
	gr_typos = gr.Radio(label="誤字・脱字", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_moral_judgement = gr.Radio(label="非道徳", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_hate = gr.Radio(label="ヘイト", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_sexual = gr.Radio(label="性的内容", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_violence = gr.Radio(label="暴力的", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_suicide = gr.Radio(label="自殺行為", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_threat = gr.Radio(label="犯罪", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_gun = gr.Radio(label="銃等", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_controlled_substance = gr.Radio(label="規制対象物質", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_criminal_planing = gr.Radio(label="犯罪計画", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_privacy = gr.Radio(label="個人情報", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_harassment = gr.Radio(label="ハラスメント", choices=["不明", "有", "無"], value="不明", interactive=False)

	with gr.Row():
	gr_profanity = gr.Radio(label="冒涜行為", choices=["不明", "有", "無"], value="不明", interactive=False)
	gr_political_content = gr.Radio(label="政治的内容", choices=["不明", "有", "無"], value="不明", interactive=False)


	# 5段階評価詳細のgr.State Update
	def eval_submit(
	# text
	g_answer_text_3_1: str,

	# slicer
	g_score: int,
	g_helpfulness: int,
	g_correctness: int,
	g_coherence: int,
	g_complexity: int,
	g_verbosity: int,
	g_humor: int,
	g_creativity: int,
	g_appropriate: int,
	g_following_instructions: int,
	g_politeness: int,
	g_harmfulness: int,

	# checkbox
	g_hate: str,
	g_sexual: str,
	g_violence: str,
	g_suicide: str,
	g_threat: str,
	g_gun: str,
	g_controlled_substance: str,
	g_criminal_planing: str,
	g_privacy: str,
	g_harassment: str,
	g_profanity: str,
	g_political_content: str,
	g_moral_judgement: str,
	g_typos: str,
	):
	# 状態初期化
	good.value = False
	bad.value = False
	unknown_quality.value = False

	# テキストに更新があったかどうかチェック
	if initial_answer_text_1.value != g_answer_text_3_1:
	is_proofreading_1.value = True
	answer_text_1.value = g_answer_text_3_1
	else:
	is_proofreading_1.value = False
	answer_text_1.value = ""

	# 送信用データ更新
	score.value = g_score
	helpfulness.value = g_helpfulness
	correctness.value = g_correctness
	coherence.value = g_coherence
	complexity.value = g_complexity
	verbosity.value = g_verbosity
	humor.value = g_humor
	creativity.value = g_creativity
	appropriate.value = g_appropriate
	following_instructions.value = g_following_instructions
	politeness.value = g_politeness
	harmfulness.value = g_harmfulness

	hate.value = checkbox_to_int(g_hate)
	sexual.value = checkbox_to_int(g_sexual)
	violence.value = checkbox_to_int(g_violence)
	suicide.value = checkbox_to_int(g_suicide)
	threat.value = checkbox_to_int(g_threat)
	gun.value = checkbox_to_int(g_gun)
	controlled_substance.value = checkbox_to_int(g_controlled_substance)
	criminal_planing.value = checkbox_to_int(g_criminal_planing)
	privacy.value = checkbox_to_int(g_privacy)
	harassment.value = checkbox_to_int(g_harassment)
	profanity.value = checkbox_to_int(g_profanity)
	political_content.value = checkbox_to_int(g_political_content)
	moral_judgement.value = checkbox_to_int(g_moral_judgement)
	typos.value = checkbox_to_int(g_typos)

	# データ送信

	# 表示更新
	# indexを進める
	select_idx.value += 1

	df = select_dataset.value["train"]

	# ループさせるか、エラー処理を行う
	if select_idx.value >= len(df):
	select_idx.value = 0

	# データセットに追加
	df = select_dataset.value["train"]
	save_annotation(
	select_dropdown_dataset.value,
	df.iloc[select_idx.value]['index'],
	who.value,
	unknown_quality.value,
	good.value,
	bad.value,
	is_proofreading_1.value,
	answer_text_1.value,
	is_proofreading_2.value,
	answer_text_2.value,
	score.value,
	helpfulness.value,
	correctness.value,
	coherence.value,
	complexity.value,
	verbosity.value,
	humor.value,
	creativity.value,
	appropriate.value,
	following_instructions.value,
	politeness.value,
	harmfulness.value,
	hate.value,
	sexual.value,
	violence.value,
	suicide.value,
	threat.value,
	gun.value,
	controlled_substance.value,
	criminal_planing.value,
	privacy.value,
	harassment.value,
	profanity.value,
	political_content.value,
	moral_judgement.value,
	typos.value,
	)

	# Nextデータ初期化
	initialize_next_data(df)

	# 5段階評価をリセット
	score_reset_display()

	# UI更新のための返り値
	return gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	*score_reset_display() # スコアリセット用の返り値

	gr_submit_score.click(
	eval_submit,
	inputs=[
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	],
	# UI更新
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	],
	)

	# 5段階評価ボタンのクリックイベントを定義
	def score_button_clicked(button_value):
	# 状態初期化
	good.value = False
	bad.value = False
	unknown_quality.value = False
	score.value = button_value

	# 5段階評価クリック
	def update_annotation(
	input_ans_1: str = None,
	input_ans_2: str = None,
	is_good: bool = None, # good/bad を表すフラグを追加
	is_unknown: bool = None, # 「分からない」を表すフラグを追加
	is_simple: bool = None,
	score_value: int = None # 5段階評価の値、good/badの場合はNone
	) -> Tuple[
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	gr.update,
	]:

	# good/bad/unknown と score の状態を更新
	update_evaluation_state(is_good, is_unknown, score_value)

	# 変更を検知 (5段階評価の場合も処理するように変更)
	update_answer_state(input_ans_1, input_ans_2)

	# 表示更新
	# indexを進める
	select_idx.value += 1

	df = select_dataset.value["train"]

	# ループさせるか、エラー処理を行う
	if select_idx.value >= len(df):
	select_idx.value = 0

	# シンプルモードクリックで、scoreを0に設定
	if is_simple == True:
	score.value = 0

	# データセットに追加
	# 元のindex番号(dataset_id)を指定して保存
	save_annotation_data(df)

	# Nextデータ初期化
	initialize_next_data(df)

	# 5段階評価をリセット
	reset_values = score_reset_display()

	return gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	gr.update(value=df.iloc[select_idx.value]["question"]), \
	gr.update(value=df.iloc[select_idx.value]["answer"]), \
	*reset_values


	# 評価状態リセット
	def update_evaluation_state(is_good, is_unknown, score_value):
	if score_value is not None: # 5段階評価の場合
	good.value = False
	bad.value = False
	unknown_quality.value = False
	score.value = score_value
	else: # good/bad/unknown 評価の場合
	good.value = is_good
	bad.value = not is_good if not is_unknown else False
	unknown_quality.value = is_unknown # 「分からない」の状態を設定

	# 変更を検知して値を設定
	def update_answer_state(input_ans_1, input_ans_2):
	if input_ans_1 is not None and initial_answer_text_1.value != input_ans_1:
	is_proofreading_1.value = True
	answer_text_1.value = input_ans_1
	else:
	answer_text_1.value = ""

	# 2ターン目用
	if input_ans_2 is not None and initial_answer_text_2.value != input_ans_2:
	is_proofreading_2.value = True
	answer_text_2.value = input_ans_2
	else:
	answer_text_2.value = ""

	# データ保存
	def save_annotation_data(df):
	save_annotation(
	select_dropdown_dataset.value,
	# datasetIdは元のindex番号を使用
	df.iloc[select_idx.value]['index'],
	who.value,
	unknown_quality.value,
	good.value,
	bad.value,
	is_proofreading_1.value,
	answer_text_1.value,
	is_proofreading_2.value,
	answer_text_2.value,
	score.value,
	helpfulness.value,
	correctness.value,
	coherence.value,
	complexity.value,
	verbosity.value,
	humor.value,
	creativity.value,
	appropriate.value,
	following_instructions.value,
	politeness.value,
	harmfulness.value,
	hate.value,
	sexual.value,
	violence.value,
	suicide.value,
	threat.value,
	gun.value,
	controlled_substance.value,
	criminal_planing.value,
	privacy.value,
	harassment.value,
	profanity.value,
	political_content.value,
	moral_judgement.value,
	typos.value,
	)

	# 次データ読込時の初期化
	def initialize_next_data(df):
	is_proofreading_1.value = False
	is_proofreading_2.value = False
	initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
	initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]

	# 評価不可
	def unknown_click(input_ans_1, input_ans_2):
	good.value = False
	bad.value = False
	score_reset_display()
	return update_annotation(
	input_ans_1=input_ans_1,
	input_ans_2=input_ans_2,
	is_good=False,
	is_unknown=True,
	is_simple=True,
	)

	# 評価不可クリックイベント
	gr_unknown_btn.click(
	unknown_click,
	inputs=[
	gr_answer_text_1_1,
	gr_answer_text_2_1
	],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	]
	)

	# 良いクリック
	def good_click(input_ans_1, input_ans_2):
	score_reset_display()
	return update_annotation(
	input_ans_1=input_ans_1,
	input_ans_2=input_ans_2,
	is_good=True,
	is_unknown=False,
	is_simple=True,
	)

	# 良いクリック
	gr_good_btn.click(
	good_click,
	inputs=[
	gr_answer_text_1_1,
	gr_answer_text_2_1
	],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	]
	)

	# 低評価クリック
	def bad_click(input_ans_1, input_ans_2):
	score_reset_display()
	return update_annotation(
	input_ans_1=input_ans_1,
	input_ans_2=input_ans_2,
	is_good=False,
	is_unknown=False,
	is_simple=True,
	)

	# 低評価クリックイベント
	gr_bad_btn.click(
	bad_click,
	inputs=[
	gr_answer_text_1_1,
	gr_answer_text_2_1
	],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	]
	)

	# 5段階評価ボタンのクリックイベント
	gr_score_1_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=1),
	inputs=[gr_answer_text_2_1],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	])
	gr_score_2_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=2),
	inputs=[gr_answer_text_2_1],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	])
	gr_score_3_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=3),
	inputs=[gr_answer_text_2_1],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	])
	gr_score_4_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=4),
	inputs=[gr_answer_text_2_1],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	])
	gr_score_5_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=5),
	inputs=[gr_answer_text_2_1],
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	])

	# Scoreリセット
	def score_reset_display():
	# gr.State
	score.value = 3
	helpfulness.value = 0
	correctness.value = 0
	coherence.value = 0
	complexity.value = 0
	verbosity.value = 0
	humor.value = 0
	creativity.value = 0
	appropriate.value = 0
	following_instructions.value = 0
	politeness.value = 0
	harmfulness.value = 0
	hate.value = 0
	sexual.value = 0
	violence.value = 0
	suicide.value = 0
	threat.value = 0
	gun.value = 0
	controlled_substance.value = 0
	criminal_planing.value = 0
	privacy.value = 0
	harassment.value = 0
	profanity.value = 0
	political_content.value = 0
	moral_judgement.value = 0
	typos.value = 0

	return 3,0,0,0,0,0,0,0,0,0,0,0, \
	"不明", "不明", "不明", "不明", "不明", \
	"不明", "不明", "不明", "不明", "不明", \
	"不明", "不明", "不明", "不明"

	# Scoreリセット
	gr_score_reset.click(
	score_reset_display,
	inputs=[],
	outputs=[
	gr_score_detail,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	]
	)

	# データ読込
	gr_data_load_btn.click(
	dataset_load_fn,
	inputs=None,
	# textをセットし、interactiveをTrueにする (UI更新)
	outputs=[
	gr_question_text_1_1,
	gr_answer_text_1_1,
	gr_question_text_2_1,
	gr_answer_text_2_1,
	gr_question_text_3_1,
	gr_answer_text_3_1,
	gr_answer_text_1_1,
	gr_answer_text_2_1,
	gr_answer_text_3_1,
	gr_unknown_btn,
	gr_good_btn,
	gr_bad_btn,
	gr_score_5_btn,
	gr_score_4_btn,
	gr_score_3_btn,
	gr_score_2_btn,
	gr_score_1_btn,
	gr_submit_score,
	gr_score_detail,
	gr_score_reset,
	gr_helpfulness,
	gr_correctness,
	gr_coherence,
	gr_complexity,
	gr_verbosity,
	gr_humor,
	gr_creativity,
	gr_appropriate,
	gr_following_instructions,
	gr_politeness,
	gr_harmfulness,
	gr_hate,
	gr_sexual,
	gr_violence,
	gr_suicide,
	gr_threat,
	gr_gun,
	gr_controlled_substance,
	gr_criminal_planing,
	gr_privacy,
	gr_harassment,
	gr_profanity,
	gr_political_content,
	gr_moral_judgement,
	gr_typos
	]
	)

	# TODO Tab切り替えで、アノテ済みの一覧を表示する
	# with gr.Tab("アノテ済みデータセット(管理画面)"):
	# タブを切り替えた時にデータ表示を更新する

	if __name__ == "__main__":
	demo.launch()