kevineen's picture
typo 修正
19ee3e1
import json
import datetime
from pathlib import Path
import uuid
from typing import Tuple
import pandas as pd
import gradio as gr
from datasets import load_dataset
from huggingface_hub import CommitScheduler
# HFデータセット アップロード先
# (切り替えてテストする用に配列)
OUTPUT_DATASET = [
"team-hatakeyama-phase2/annotation_tanuki_phase2",
"kevineen/Tanuki-Phase2-annotation-dataset", # 出力テスト
]
# アノテーション対象データセット
ANNOTATION_DATASET = [
"hatakeyama-llm-team/AutoGeneratedJapaneseQA",
"hatakeyama-llm-team/AutoGeneratedJapaneseQA-other",
"kanhatakeyama/ChatbotArenaJaMixtral8x22b",
"kanhatakeyama/OrcaJaMixtral8x22b",
"kanhatakeyama/LogicalDatasetsByMixtral8x22b",
# データ形式未対応(対応予定
# "hatakeyama-llm-team/WikiBookJa",
# "kanhatakeyama/AutoWikiQA",
# "susumuota/SyntheticTextWikiTranslate-askllm-v1", # Ask-LLM 翻訳
# Nemotron データ (将来用
]
MULTI_TURN_ANNOTATION_DATASET = [
# マルチターン 未対応
"kanhatakeyama/AutoMultiTurnByMixtral8x22b",
]
# Session State : (ブラウザセッション単位の変数管理) ===========================
# UIのEnable/Disable用State
is_selected_dataset = gr.State(False)
is_loaded_dataset = gr.State(False)
# 選択中のデータセットリスト
dropdown_dataset_list = gr.State(value=ANNOTATION_DATASET)
# 現在の対象データセット 初期値は"hatakeyama-llm-team/AutoGeneratedJapaneseQA",
select_dropdown_dataset = gr.State(dropdown_dataset_list.value[0])
select_dataset = gr.State(None) # 現在のデータセット
select_dataset_total_len = gr.State(0) # 現在のデータセットの長さ
select_idx = gr.State(0) # 現在のインデックス (ランダムモードにするなら不要?
# random_mode = gr.State(False) # ランダムモード
# 回答者がアノテーションしたデータセット
annotated_dataset = gr.State(
pd.DataFrame({
'dataset_name': [], # 対象データセット
'dataset_id': [], # データセットindex
'who': [], # アノテーション者名
'unknown_quality': [], # 不明能
'good': [], # 良
'bad': [], # 悪
'is_proofreading_1': [], # テキストに修正があったか
'answer_text_1': [],
'is_proofreading_2': [], # 2ターン目用
'answer_text_2': [], # 2ターン目用
# 5段階評価
'score': [], # 総合 (5段階評価)
'helpfulness': [], # 有用性 (5段階評価)
'correctness': [], # 正確さ (5段階評価)
'coherence': [], # 一貫性 (5段階評価)
'complexity': [], # 複雑さ (5段階評価)
'verbosity': [], # 冗長性 (5段階評価)
'humor': [], # ユーモア (SteerLM)
'creativity': [], # 創造性 (SteerLM)
'appropriate': [], # 適切性 (SteerLMでは不適)
'following_instructions': [], # 指示が合った場合、従っているか (SteerLMのfails_taskに該当)
'politeness': [], # 礼儀正しさ (minnadechat)
'harmfulness': [], # 有性 (minnadechat)
# 0: 分からない(未設定) 1: 無 2: 有
'typos': [], # 誤字脱字 (minnadechat)
'hate': [], # ヘイト
'sexual': [], # 性的
'violence': [], # 暴力
'suicide': [], # 自殺
'threat': [], # 犯罪
'gun': [], # 重火器
'controlled_substance': [], # 規制対象物質
'criminal_planing': [], # 犯罪計画
'privacy': [], # 個人情報
'harassment': [], # ハラスメント
'profanity': [], # 冒涜
'political_content': [], # 政治的内容 (SteerLM)
'moral_judgement': [], # 非道徳 (SteerLM)
})
)
is_dataset_loaded = gr.State(False) # データセットロード状態
you_dataset_id = gr.State(0) # 回答者がアノテーションしているデータのID
dataset_name = gr.State("") # 編集に使用したデータセット名
dataset_id = gr.State(0) # 加工元データセットのindex
who = gr.State("") # アノテーション者名
# シンプルモード
unknown_quality = gr.State(False) # 分からない(未設定)
good = gr.State(False) # 良
bad = gr.State(False) # 悪
initial_answer_text_1 = gr.State("") # 回答1を整形したかチェック用
initial_answer_text_2 = gr.State("") # 回答2を整形したかチェック用
is_proofreading_1 = gr.State(False) # 回答1を整形したか_1
answer_text_1 = gr.State("") # answer_1 回答
is_proofreading_2 = gr.State(False) # 回答2を整形したか_2
answer_text_2 = gr.State("") # answer_2 回答
# (5段階評価)
# 0は判断していない、又は判断が難しいという特徴量にする
score = gr.State(3) # 総合スコア 初期値は3 (最低限必須)
helpfulness = gr.State(0) # 有用性
correctness = gr.State(0) # 正確さ
coherence = gr.State(0) # 一貫性
complexity = gr.State(0) # 複雑さ
verbosity = gr.State(0) # 冗長性
humor = gr.State(0) # ユーモア (SteerLM)
creativity = gr.State(0) # 創造性 (SteerLM)
appropriate = gr.State(0) # 適切性 (SteerLMではnot_appropriate)
following_instructions = gr.State(0) # 指示への忠実度 (SteerLMのfails_task)
politeness = gr.State(0) # 礼儀正しさ (minnadechat)
harmfulness = gr.State(0) # 有度 (minnadechat)
# (有無評価)
# 0は判断していない、又は判断が難しいという特徴量にする
hate = gr.State(0) # ヘイト
sexual = gr.State(0) # 性的
violence = gr.State(0) # 暴力
suicide = gr.State(0) # 自殺
threat = gr.State(0) # 犯罪
gun = gr.State(0) # 銃・重火器
controlled_substance = gr.State(0) # 規制対象物質
criminal_planing = gr.State(0) # 犯罪計画
privacy = gr.State(0) # 個人情報・プライバシー情報
harassment = gr.State(0) # ハラスメント
profanity = gr.State(0) # 冒涜
political_content = gr.State(0) # 政治的内容 (SteerLM)
moral_judgement = gr.State(0) # 非道徳 (SteerLM)
typos = gr.State(0) # 誤字脱字(minnadechat)
# 未整理 ========================================
# データセット読み込み
def dataset_load_fn() -> Tuple[
str,
str,
str,
str,
str,
str,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
]:
is_dataset_loaded.value = False # ロード状態初期化
select_dataset.value = load_dataset(
select_dropdown_dataset.value
)
# DatasetオブジェクトをPandas DataFrameに変換
df = select_dataset.value["train"].to_pandas()
# ランダム化実施
# index列を追加し、シャッフル
df = df.reset_index(drop=False) # 元のindexを保持
df = df.sample(frac=1).reset_index(drop=True) # シャッフル
select_dataset.value["train"] = df # シャッフルされたDataFrameを格納
select_idx.value = 0 # index初期化
select_dataset_total_len.value = len(df) # 長さを取得
is_dataset_loaded.value = True # ロード完了
# データロード時にテキストの初期値を設定
initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]
return df.iloc[select_idx.value]["question"], \
df.iloc[select_idx.value]["answer"], \
df.iloc[select_idx.value]["question"], \
df.iloc[select_idx.value]["answer"], \
df.iloc[select_idx.value]["question"], \
df.iloc[select_idx.value]["answer"], \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True), \
gr.update(interactive=True)
# データの保存処理 ========================================
# Spaceの場合の保存先はCommitSchedulerのpath_in_repoフォルダ
# (ローカル開発の場合./user_annotationフォルダにjsonファイルが作成される)
annotation_file = Path("user_annotation/") / f"data_{uuid.uuid4()}.json"
annotated_folder = annotation_file.parent
scheduler = CommitScheduler(
repo_id=OUTPUT_DATASET[1],
repo_type="dataset",
folder_path=annotated_folder,
path_in_repo="data", # Spaceの場合の保存先フォルダー
private=True,
every=5, # 5分毎にアップロード HuggingFAce_Documentの最低推奨値
)
# チェックボックスをintに変換
def checkbox_to_int(checkbox_value) -> int:
if checkbox_value == "不明":
output = 0
elif checkbox_value == "有":
output = 1
elif checkbox_value == "無":
output = 2
else :
output = 0
print("error: ")
return output
# CommitScheduler (HFへのデータアップロード
def save_annotation(
dataset_name: str,
dataset_id: int,
who: str,
unknown_quality: bool,
good: bool,
bad: bool,
is_proofreading_1: bool,
answer_text_1: str,
is_proofreading_2: bool,
answer_text_2: str,
score: int,
helpfulness: int,
correctness: int,
coherence: int,
complexity: int,
verbosity: int,
humor: int,
creativity: int,
appropriate: int,
following_instructions: int,
politeness: int,
harmfulness: int,
hate: int,
sexual: int,
violence: int,
suicide: int,
threat: int,
gun: int,
controlled_substance: int,
criminal_planing: int,
privacy: int,
harassment: int,
profanity: int,
political_content: int,
moral_judgement: int,
typos: int,
) -> None:
annotated_dataset.value = pd.concat([
annotated_dataset.value,
pd.DataFrame({
'dataset_name': [dataset_name],
'dataset_id': [dataset_id],
'who': [who],
'unknown_quality': [unknown_quality],
'good': [good],
'bad': [bad],
'is_proofreading_1': [is_proofreading_1],
"answer_text_1": [answer_text_1],
'is_proofreading_2': [is_proofreading_2],
'answer_text_2': [answer_text_2],
'score': [score],
'helpfulness': [helpfulness],
'correctness': [correctness],
'complexity': [complexity],
'verbosity': [verbosity],
'humor': [humor],
'creativity': [creativity],
'appropriate': [appropriate],
'following_instructions': [following_instructions],
'politeness': [politeness],
'harmfulness': [harmfulness],
'hate': [hate],
'sexual': [sexual],
'violence': [violence],
'suicide': [suicide],
'threat': [threat],
'gun': [gun],
'controlled_substance': [controlled_substance],
'criminal_planing': [criminal_planing],
'privacy': [privacy],
'harassment': [harassment],
'profanity': [profanity],
'political_content': [political_content],
'moral_judgement': [moral_judgement],
'types': [typos]
})], ignore_index=True).reset_index(drop=True)
# 書き込み
with scheduler.lock:
with annotation_file.open("a", encoding='utf-8') as f:
data_to_write = {
# "id": , CommitSchedulerは、取得して末尾idを付与することが不可能?
"datetime": str(datetime.datetime.now().isoformat()),
"dataset_name": dataset_name,
"dataset_id": int(dataset_id),
"who": who,
"unknown_quality": unknown_quality,
"good": good,
"bad": bad,
"is_proofreading_1": is_proofreading_1,
"answer_text_1": answer_text_1,
"is_proofreading_2": is_proofreading_2,
"answer_text_2": answer_text_2,
"score": int(score),
"helpfulness": int(helpfulness),
"correctness": int(correctness),
"coherence": int(coherence),
"complexity": int(complexity),
"verbosity": int(verbosity),
"humor": int(humor),
"creativity": int(creativity),
"appropriate": int(appropriate),
"following_instructions": int(following_instructions),
"politeness": int(politeness),
"harmfulness": int(harmfulness),
"hate": int(hate),
"sexual": int(sexual),
"violence": int(violence),
"suicide": int(suicide),
"threat": int(threat),
"gun": int(gun),
"controlled_substance": int(controlled_substance),
"criminal_planing": int(criminal_planing),
"privacy": int(privacy),
"harassment": int(harassment),
"profanity": int(profanity),
"political_content": int(political_content),
"moral_judgement": int(moral_judgement),
"typos": int(typos)
}
f.write(json.dumps(data_to_write, ensure_ascii=False))
f.write("\n")
# アノテーションの追加処理 ========================================
# UI処理 ========================================
# ユーザー名表示
def hello(profile: gr.OAuthProfile | None) -> Tuple[str, str]:
if profile is None:
return "プライベートデータセット取得のためにログインしてください。", who.value
who.value = profile.username
return f'{profile.username} さん、よろしくお願いいたします。', who.value
# テーマの状態
theme_ = gr.themes.Default()
# 後のCSSデザイン変更用
def load_css():
with open("style.css", "r") as file:
css_content = file.read()
return css_content
# Gradio 画面 ============================================
with gr.Blocks(theme=theme_, css=load_css()) as demo:
gr.Markdown("# データセット アノテーション for Tanuki (Phase2)")
with gr.Row():
gr.Markdown("### GENIACにて開発中のLLM用データセットに対してアノテーションするSpaceです\n \
入力されたデータは使用される可能性があるため、個人情報・秘匿情報などは入力しないでください。\n \
テスト中です。")
gr_who = gr.TextArea(value=who.value, lines=1, max_lines=1, label="ユーザー名 (入力してください)")
def change_name(name: str):
who.value = name
gr_who.change(
change_name,
inputs=[gr_who],
outputs=[]
)
with gr.Tab("アノテーション (シングルターン)"):
# HF login 機能
# with gr.Row(equal_height=True):
# gr.LoginButton(value="HuggingFace ログイン",
# logout_value="HuggingFace ログアウト", scale=1)
# ユーザー名
# gr_profile_name = gr.Markdown()
# demo.load(hello, inputs=None, outputs=[gr_profile_name, who])
with gr.Row():
def dropdown_select(select_value) -> None:
select_dropdown_dataset.value = select_value
# 対象データセットの選択
gr_dropdown_dataset = gr.Dropdown(
label="データセット選択 ①",
choices=dropdown_dataset_list.value,
value=select_dropdown_dataset.value,
elem_id="dataset_sel",
scale=2)
gr_dropdown_dataset.change(
dropdown_select,
inputs=[gr_dropdown_dataset]
)
gr_data_load_btn = gr.Button("② データセットを読み込む")
with gr.Column() as content_column:
with gr.Tab("③ シンプル(良・悪)"):
with gr.Column():
with gr.Row(equal_height=True):
gr_good_btn = gr.Button("良い", interactive=False)
gr_unknown_btn = gr.Button(
"分からない", interactive=False) # 「分からない」ボタンを追加
gr_bad_btn = gr.Button("悪い", interactive=False)
gr_question_text_1_1 = gr.Textbox(
label="質問: ", lines=5, interactive=False)
gr_answer_text_1_1 = gr.Textbox(
label="回答: 訂正して頂けると、品質が上がります。",
lines=20,
interactive=False)
with gr.Tab("③ 5段階評価(シンプル)"):
with gr.Row() as simple_score_btn:
gr_score_5_btn = gr.Button("5: 高品質", interactive=False)
gr_score_4_btn = gr.Button("4: 良い", interactive=False)
gr_score_3_btn = gr.Button("3: 普通", interactive=False)
gr_score_2_btn = gr.Button("2: 悪い", interactive=False)
gr_score_1_btn = gr.Button("1: 低品質", interactive=False)
gr_question_text_2_1 = gr.Textbox(
label="質問: ", lines=5, interactive=False)
gr_answer_text_2_1 = gr.Textbox(
label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False)
with gr.Tab("③ 5段階評価 (詳細)"):
with gr.Row():
with gr.Column() as EvalFive:
gr_question_text_3_1 = gr.Textbox(
label="質問: ", lines=10, interactive=False)
# gr_explain = gr.Markdown("回答")
gr_answer_text_3_1 = gr.Textbox(
label="回答: 訂正して頂けると、品質が上がります。", lines=35, interactive=False)
with gr.Column() as EvalFiveDetail:
with gr.Row():
gr_submit_score = gr.Button("評価送信", interactive=False)
gr_score_reset = gr.Button("スコアリセット", interactive=False)
gr_score_detail = gr.Slider(label="総合スコア 【必須】",value=3, minimum=1, maximum=5, step=1, interactive=False)
gr_eval_annotation_explain = gr.Markdown("詳細アノテーション (5点満点)")
with gr.Row() :
gr_helpfulness = gr.Slider(label="有用性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_correctness = gr.Slider(label="正確さ", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_coherence = gr.Slider(label="一貫性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_complexity = gr.Slider(label="複雑さ", minimum=0, value=0, maximum=5, step=1, interactive=False)
with gr.Row() :
gr_verbosity = gr.Slider(label="冗長性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_humor = gr.Slider(label="ユーモア", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_creativity = gr.Slider(label="創造性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_appropriate = gr.Slider(label="適切性", minimum=0, value=0, maximum=5, step=1, interactive=False)
with gr.Row() :
gr_following_instructions = gr.Slider(label="忠実性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_politeness = gr.Slider(label="礼儀正しさ", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_harmfulness = gr.Slider(label="有害性", minimum=0, value=0, maximum=5, step=1, interactive=False)
gr_text_4 = gr.Markdown("判断可能な物のみ\n\nスコアを付けてください。")
gr_text_5 = gr.Markdown("")
with gr.Row():
gr_typos = gr.Radio(label="誤字・脱字", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_moral_judgement = gr.Radio(label="非道徳", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_hate = gr.Radio(label="ヘイト", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_sexual = gr.Radio(label="性的内容", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_violence = gr.Radio(label="暴力的", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_suicide = gr.Radio(label="自殺行為", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_threat = gr.Radio(label="犯罪", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_gun = gr.Radio(label="銃等", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_controlled_substance = gr.Radio(label="規制対象物質", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_criminal_planing = gr.Radio(label="犯罪計画", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_privacy = gr.Radio(label="個人情報", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_harassment = gr.Radio(label="ハラスメント", choices=["不明", "有", "無"], value="不明", interactive=False)
with gr.Row():
gr_profanity = gr.Radio(label="冒涜行為", choices=["不明", "有", "無"], value="不明", interactive=False)
gr_political_content = gr.Radio(label="政治的内容", choices=["不明", "有", "無"], value="不明", interactive=False)
# 5段階評価 詳細のgr.State Update
def eval_submit(
# text
g_answer_text_3_1: str,
# slicer
g_score: int,
g_helpfulness: int,
g_correctness: int,
g_coherence: int,
g_complexity: int,
g_verbosity: int,
g_humor: int,
g_creativity: int,
g_appropriate: int,
g_following_instructions: int,
g_politeness: int,
g_harmfulness: int,
# checkbox
g_hate: str,
g_sexual: str,
g_violence: str,
g_suicide: str,
g_threat: str,
g_gun: str,
g_controlled_substance: str,
g_criminal_planing: str,
g_privacy: str,
g_harassment: str,
g_profanity: str,
g_political_content: str,
g_moral_judgement: str,
g_typos: str,
):
# 状態初期化
good.value = False
bad.value = False
unknown_quality.value = False
# テキストに更新があったかどうかチェック
if initial_answer_text_1.value != g_answer_text_3_1:
is_proofreading_1.value = True
answer_text_1.value = g_answer_text_3_1
else:
is_proofreading_1.value = False
answer_text_1.value = ""
# 送信用データ更新
score.value = g_score
helpfulness.value = g_helpfulness
correctness.value = g_correctness
coherence.value = g_coherence
complexity.value = g_complexity
verbosity.value = g_verbosity
humor.value = g_humor
creativity.value = g_creativity
appropriate.value = g_appropriate
following_instructions.value = g_following_instructions
politeness.value = g_politeness
harmfulness.value = g_harmfulness
hate.value = checkbox_to_int(g_hate)
sexual.value = checkbox_to_int(g_sexual)
violence.value = checkbox_to_int(g_violence)
suicide.value = checkbox_to_int(g_suicide)
threat.value = checkbox_to_int(g_threat)
gun.value = checkbox_to_int(g_gun)
controlled_substance.value = checkbox_to_int(g_controlled_substance)
criminal_planing.value = checkbox_to_int(g_criminal_planing)
privacy.value = checkbox_to_int(g_privacy)
harassment.value = checkbox_to_int(g_harassment)
profanity.value = checkbox_to_int(g_profanity)
political_content.value = checkbox_to_int(g_political_content)
moral_judgement.value = checkbox_to_int(g_moral_judgement)
typos.value = checkbox_to_int(g_typos)
# データ送信
# 表示更新
# indexを進める
select_idx.value += 1
df = select_dataset.value["train"]
# ループさせるか、エラー処理を行う
if select_idx.value >= len(df):
select_idx.value = 0
# データセットに追加
df = select_dataset.value["train"]
save_annotation(
select_dropdown_dataset.value,
df.iloc[select_idx.value]['index'],
who.value,
unknown_quality.value,
good.value,
bad.value,
is_proofreading_1.value,
answer_text_1.value,
is_proofreading_2.value,
answer_text_2.value,
score.value,
helpfulness.value,
correctness.value,
coherence.value,
complexity.value,
verbosity.value,
humor.value,
creativity.value,
appropriate.value,
following_instructions.value,
politeness.value,
harmfulness.value,
hate.value,
sexual.value,
violence.value,
suicide.value,
threat.value,
gun.value,
controlled_substance.value,
criminal_planing.value,
privacy.value,
harassment.value,
profanity.value,
political_content.value,
moral_judgement.value,
typos.value,
)
# Nextデータ初期化
initialize_next_data(df)
# 5段階評価をリセット
score_reset_display()
# UI更新のための返り値
return gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
*score_reset_display() # スコアリセット用の返り値
gr_submit_score.click(
eval_submit,
inputs=[
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
],
# UI更新
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
],
)
# 5段階評価ボタンのクリックイベントを定義
def score_button_clicked(button_value):
# 状態初期化
good.value = False
bad.value = False
unknown_quality.value = False
score.value = button_value
# 5段階評価クリック
def update_annotation(
input_ans_1: str = None,
input_ans_2: str = None,
is_good: bool = None, # good/bad を表すフラグを追加
is_unknown: bool = None, # 「分からない」を表すフラグを追加
is_simple: bool = None,
score_value: int = None # 5段階評価の値、good/badの場合はNone
) -> Tuple[
gr.update,
gr.update,
gr.update,
gr.update,
gr.update,
]:
# good/bad/unknown と score の状態を更新
update_evaluation_state(is_good, is_unknown, score_value)
# 変更を検知 (5段階評価の場合も処理するように変更)
update_answer_state(input_ans_1, input_ans_2)
# 表示更新
# indexを進める
select_idx.value += 1
df = select_dataset.value["train"]
# ループさせるか、エラー処理を行う
if select_idx.value >= len(df):
select_idx.value = 0
# シンプルモードクリックで、scoreを0に設定
if is_simple == True:
score.value = 0
# データセットに追加
# 元のindex番号(dataset_id)を指定して保存
save_annotation_data(df)
# Nextデータ初期化
initialize_next_data(df)
# 5段階評価をリセット
reset_values = score_reset_display()
return gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
gr.update(value=df.iloc[select_idx.value]["question"]), \
gr.update(value=df.iloc[select_idx.value]["answer"]), \
*reset_values
# 評価状態リセット
def update_evaluation_state(is_good, is_unknown, score_value):
if score_value is not None: # 5段階評価の場合
good.value = False
bad.value = False
unknown_quality.value = False
score.value = score_value
else: # good/bad/unknown 評価の場合
good.value = is_good
bad.value = not is_good if not is_unknown else False
unknown_quality.value = is_unknown # 「分からない」の状態を設定
# 変更を検知して値を設定
def update_answer_state(input_ans_1, input_ans_2):
if input_ans_1 is not None and initial_answer_text_1.value != input_ans_1:
is_proofreading_1.value = True
answer_text_1.value = input_ans_1
else:
answer_text_1.value = ""
# 2ターン目用
if input_ans_2 is not None and initial_answer_text_2.value != input_ans_2:
is_proofreading_2.value = True
answer_text_2.value = input_ans_2
else:
answer_text_2.value = ""
# データ保存
def save_annotation_data(df):
save_annotation(
select_dropdown_dataset.value,
# datasetIdは元のindex番号を使用
df.iloc[select_idx.value]['index'],
who.value,
unknown_quality.value,
good.value,
bad.value,
is_proofreading_1.value,
answer_text_1.value,
is_proofreading_2.value,
answer_text_2.value,
score.value,
helpfulness.value,
correctness.value,
coherence.value,
complexity.value,
verbosity.value,
humor.value,
creativity.value,
appropriate.value,
following_instructions.value,
politeness.value,
harmfulness.value,
hate.value,
sexual.value,
violence.value,
suicide.value,
threat.value,
gun.value,
controlled_substance.value,
criminal_planing.value,
privacy.value,
harassment.value,
profanity.value,
political_content.value,
moral_judgement.value,
typos.value,
)
# 次データ読込時の初期化
def initialize_next_data(df):
is_proofreading_1.value = False
is_proofreading_2.value = False
initial_answer_text_1.value = df.iloc[select_idx.value]["answer"]
initial_answer_text_2.value = df.iloc[select_idx.value]["answer"]
# 評価不可
def unknown_click(input_ans_1, input_ans_2):
good.value = False
bad.value = False
score_reset_display()
return update_annotation(
input_ans_1=input_ans_1,
input_ans_2=input_ans_2,
is_good=False,
is_unknown=True,
is_simple=True,
)
# 評価不可クリックイベント
gr_unknown_btn.click(
unknown_click,
inputs=[
gr_answer_text_1_1,
gr_answer_text_2_1
],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
]
)
# 良いクリック
def good_click(input_ans_1, input_ans_2):
score_reset_display()
return update_annotation(
input_ans_1=input_ans_1,
input_ans_2=input_ans_2,
is_good=True,
is_unknown=False,
is_simple=True,
)
# 良いクリック
gr_good_btn.click(
good_click,
inputs=[
gr_answer_text_1_1,
gr_answer_text_2_1
],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
]
)
# 低評価クリック
def bad_click(input_ans_1, input_ans_2):
score_reset_display()
return update_annotation(
input_ans_1=input_ans_1,
input_ans_2=input_ans_2,
is_good=False,
is_unknown=False,
is_simple=True,
)
# 低評価クリックイベント
gr_bad_btn.click(
bad_click,
inputs=[
gr_answer_text_1_1,
gr_answer_text_2_1
],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
]
)
# 5段階評価ボタンのクリックイベント
gr_score_1_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=1),
inputs=[gr_answer_text_2_1],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
])
gr_score_2_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=2),
inputs=[gr_answer_text_2_1],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
])
gr_score_3_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=3),
inputs=[gr_answer_text_2_1],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
])
gr_score_4_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=4),
inputs=[gr_answer_text_2_1],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
])
gr_score_5_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=5),
inputs=[gr_answer_text_2_1],
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
])
# Scoreリセット
def score_reset_display():
# gr.State
score.value = 3
helpfulness.value = 0
correctness.value = 0
coherence.value = 0
complexity.value = 0
verbosity.value = 0
humor.value = 0
creativity.value = 0
appropriate.value = 0
following_instructions.value = 0
politeness.value = 0
harmfulness.value = 0
hate.value = 0
sexual.value = 0
violence.value = 0
suicide.value = 0
threat.value = 0
gun.value = 0
controlled_substance.value = 0
criminal_planing.value = 0
privacy.value = 0
harassment.value = 0
profanity.value = 0
political_content.value = 0
moral_judgement.value = 0
typos.value = 0
return 3,0,0,0,0,0,0,0,0,0,0,0, \
"不明", "不明", "不明", "不明", "不明", \
"不明", "不明", "不明", "不明", "不明", \
"不明", "不明", "不明", "不明"
# Scoreリセット
gr_score_reset.click(
score_reset_display,
inputs=[],
outputs=[
gr_score_detail,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
]
)
# データ読込
gr_data_load_btn.click(
dataset_load_fn,
inputs=None,
# textをセットし、interactiveをTrueにする (UI更新)
outputs=[
gr_question_text_1_1,
gr_answer_text_1_1,
gr_question_text_2_1,
gr_answer_text_2_1,
gr_question_text_3_1,
gr_answer_text_3_1,
gr_answer_text_1_1,
gr_answer_text_2_1,
gr_answer_text_3_1,
gr_unknown_btn,
gr_good_btn,
gr_bad_btn,
gr_score_5_btn,
gr_score_4_btn,
gr_score_3_btn,
gr_score_2_btn,
gr_score_1_btn,
gr_submit_score,
gr_score_detail,
gr_score_reset,
gr_helpfulness,
gr_correctness,
gr_coherence,
gr_complexity,
gr_verbosity,
gr_humor,
gr_creativity,
gr_appropriate,
gr_following_instructions,
gr_politeness,
gr_harmfulness,
gr_hate,
gr_sexual,
gr_violence,
gr_suicide,
gr_threat,
gr_gun,
gr_controlled_substance,
gr_criminal_planing,
gr_privacy,
gr_harassment,
gr_profanity,
gr_political_content,
gr_moral_judgement,
gr_typos
]
)
# TODO Tab切り替えで、アノテ済みの一覧を表示する
# with gr.Tab("アノテ済みデータセット(管理画面)"):
# タブを切り替えた時にデータ表示を更新する
if __name__ == "__main__":
demo.launch()