|
import json |
|
import datetime |
|
from pathlib import Path |
|
import uuid |
|
from typing import Tuple |
|
|
|
import pandas as pd |
|
|
|
import gradio as gr |
|
from datasets import load_dataset |
|
from huggingface_hub import CommitScheduler |
|
|
|
|
|
|
|
OUTPUT_DATASET = [ |
|
"team-hatakeyama-phase2/annotation_tanuki_phase2", |
|
"kevineen/Tanuki-Phase2-annotation-dataset", |
|
] |
|
|
|
|
|
ANNOTATION_DATASET = [ |
|
"hatakeyama-llm-team/AutoGeneratedJapaneseQA", |
|
"hatakeyama-llm-team/AutoGeneratedJapaneseQA-other", |
|
"kanhatakeyama/ChatbotArenaJaMixtral8x22b", |
|
"kanhatakeyama/OrcaJaMixtral8x22b", |
|
"kanhatakeyama/LogicalDatasetsByMixtral8x22b", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
] |
|
|
|
MULTI_TURN_ANNOTATION_DATASET = [ |
|
|
|
"kanhatakeyama/AutoMultiTurnByMixtral8x22b", |
|
] |
|
|
|
|
|
|
|
|
|
is_selected_dataset = gr.State(False) |
|
is_loaded_dataset = gr.State(False) |
|
|
|
|
|
dropdown_dataset_list = gr.State(value=ANNOTATION_DATASET) |
|
|
|
|
|
select_dropdown_dataset = gr.State(dropdown_dataset_list.value[0]) |
|
select_dataset = gr.State(None) |
|
select_dataset_total_len = gr.State(0) |
|
select_idx = gr.State(0) |
|
|
|
|
|
|
|
annotated_dataset = gr.State( |
|
pd.DataFrame({ |
|
'dataset_name': [], |
|
'dataset_id': [], |
|
'who': [], |
|
'unknown_quality': [], |
|
'good': [], |
|
'bad': [], |
|
'is_proofreading_1': [], |
|
'answer_text_1': [], |
|
'is_proofreading_2': [], |
|
'answer_text_2': [], |
|
|
|
|
|
'score': [], |
|
'helpfulness': [], |
|
'correctness': [], |
|
'coherence': [], |
|
'complexity': [], |
|
'verbosity': [], |
|
'humor': [], |
|
'creativity': [], |
|
'appropriate': [], |
|
'following_instructions': [], |
|
'politeness': [], |
|
'harmfulness': [], |
|
|
|
|
|
'typos': [], |
|
'hate': [], |
|
'sexual': [], |
|
'violence': [], |
|
'suicide': [], |
|
'threat': [], |
|
'gun': [], |
|
'controlled_substance': [], |
|
'criminal_planing': [], |
|
'privacy': [], |
|
'harassment': [], |
|
'profanity': [], |
|
'political_content': [], |
|
'moral_judgement': [], |
|
}) |
|
) |
|
|
|
is_dataset_loaded = gr.State(False) |
|
|
|
you_dataset_id = gr.State(0) |
|
dataset_name = gr.State("") |
|
dataset_id = gr.State(0) |
|
who = gr.State("") |
|
|
|
|
|
unknown_quality = gr.State(False) |
|
good = gr.State(False) |
|
bad = gr.State(False) |
|
|
|
initial_answer_text_1 = gr.State("") |
|
initial_answer_text_2 = gr.State("") |
|
|
|
is_proofreading_1 = gr.State(False) |
|
answer_text_1 = gr.State("") |
|
is_proofreading_2 = gr.State(False) |
|
answer_text_2 = gr.State("") |
|
|
|
|
|
|
|
score = gr.State(3) |
|
helpfulness = gr.State(0) |
|
correctness = gr.State(0) |
|
coherence = gr.State(0) |
|
complexity = gr.State(0) |
|
verbosity = gr.State(0) |
|
humor = gr.State(0) |
|
creativity = gr.State(0) |
|
appropriate = gr.State(0) |
|
following_instructions = gr.State(0) |
|
politeness = gr.State(0) |
|
harmfulness = gr.State(0) |
|
|
|
|
|
|
|
hate = gr.State(0) |
|
sexual = gr.State(0) |
|
violence = gr.State(0) |
|
suicide = gr.State(0) |
|
threat = gr.State(0) |
|
gun = gr.State(0) |
|
controlled_substance = gr.State(0) |
|
criminal_planing = gr.State(0) |
|
privacy = gr.State(0) |
|
harassment = gr.State(0) |
|
profanity = gr.State(0) |
|
political_content = gr.State(0) |
|
moral_judgement = gr.State(0) |
|
typos = gr.State(0) |
|
|
|
|
|
|
|
|
|
def dataset_load_fn() -> Tuple[ |
|
str, |
|
str, |
|
str, |
|
str, |
|
str, |
|
str, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
]: |
|
|
|
is_dataset_loaded.value = False |
|
|
|
select_dataset.value = load_dataset( |
|
select_dropdown_dataset.value |
|
) |
|
|
|
|
|
df = select_dataset.value["train"].to_pandas() |
|
|
|
|
|
|
|
df = df.reset_index(drop=False) |
|
df = df.sample(frac=1).reset_index(drop=True) |
|
select_dataset.value["train"] = df |
|
|
|
select_idx.value = 0 |
|
select_dataset_total_len.value = len(df) |
|
is_dataset_loaded.value = True |
|
|
|
|
|
initial_answer_text_1.value = df.iloc[select_idx.value]["answer"] |
|
initial_answer_text_2.value = df.iloc[select_idx.value]["answer"] |
|
|
|
return df.iloc[select_idx.value]["question"], \ |
|
df.iloc[select_idx.value]["answer"], \ |
|
df.iloc[select_idx.value]["question"], \ |
|
df.iloc[select_idx.value]["answer"], \ |
|
df.iloc[select_idx.value]["question"], \ |
|
df.iloc[select_idx.value]["answer"], \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True), \ |
|
gr.update(interactive=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
annotation_file = Path("user_annotation/") / f"data_{uuid.uuid4()}.json" |
|
annotated_folder = annotation_file.parent |
|
|
|
scheduler = CommitScheduler( |
|
repo_id=OUTPUT_DATASET[1], |
|
repo_type="dataset", |
|
folder_path=annotated_folder, |
|
path_in_repo="data", |
|
private=True, |
|
every=5, |
|
) |
|
|
|
|
|
def checkbox_to_int(checkbox_value) -> int: |
|
if checkbox_value == "不明": |
|
output = 0 |
|
elif checkbox_value == "有": |
|
output = 1 |
|
elif checkbox_value == "無": |
|
output = 2 |
|
else : |
|
output = 0 |
|
print("error: ") |
|
return output |
|
|
|
|
|
def save_annotation( |
|
dataset_name: str, |
|
dataset_id: int, |
|
who: str, |
|
unknown_quality: bool, |
|
good: bool, |
|
bad: bool, |
|
is_proofreading_1: bool, |
|
answer_text_1: str, |
|
is_proofreading_2: bool, |
|
answer_text_2: str, |
|
score: int, |
|
helpfulness: int, |
|
correctness: int, |
|
coherence: int, |
|
complexity: int, |
|
verbosity: int, |
|
humor: int, |
|
creativity: int, |
|
appropriate: int, |
|
following_instructions: int, |
|
politeness: int, |
|
harmfulness: int, |
|
hate: int, |
|
sexual: int, |
|
violence: int, |
|
suicide: int, |
|
threat: int, |
|
gun: int, |
|
controlled_substance: int, |
|
criminal_planing: int, |
|
privacy: int, |
|
harassment: int, |
|
profanity: int, |
|
political_content: int, |
|
moral_judgement: int, |
|
typos: int, |
|
) -> None: |
|
|
|
annotated_dataset.value = pd.concat([ |
|
annotated_dataset.value, |
|
pd.DataFrame({ |
|
'dataset_name': [dataset_name], |
|
'dataset_id': [dataset_id], |
|
'who': [who], |
|
'unknown_quality': [unknown_quality], |
|
'good': [good], |
|
'bad': [bad], |
|
'is_proofreading_1': [is_proofreading_1], |
|
"answer_text_1": [answer_text_1], |
|
'is_proofreading_2': [is_proofreading_2], |
|
'answer_text_2': [answer_text_2], |
|
'score': [score], |
|
'helpfulness': [helpfulness], |
|
'correctness': [correctness], |
|
'complexity': [complexity], |
|
'verbosity': [verbosity], |
|
'humor': [humor], |
|
'creativity': [creativity], |
|
'appropriate': [appropriate], |
|
'following_instructions': [following_instructions], |
|
'politeness': [politeness], |
|
'harmfulness': [harmfulness], |
|
'hate': [hate], |
|
'sexual': [sexual], |
|
'violence': [violence], |
|
'suicide': [suicide], |
|
'threat': [threat], |
|
'gun': [gun], |
|
'controlled_substance': [controlled_substance], |
|
'criminal_planing': [criminal_planing], |
|
'privacy': [privacy], |
|
'harassment': [harassment], |
|
'profanity': [profanity], |
|
'political_content': [political_content], |
|
'moral_judgement': [moral_judgement], |
|
'types': [typos] |
|
})], ignore_index=True).reset_index(drop=True) |
|
|
|
|
|
with scheduler.lock: |
|
with annotation_file.open("a", encoding='utf-8') as f: |
|
data_to_write = { |
|
|
|
"datetime": str(datetime.datetime.now().isoformat()), |
|
"dataset_name": dataset_name, |
|
"dataset_id": int(dataset_id), |
|
"who": who, |
|
"unknown_quality": unknown_quality, |
|
"good": good, |
|
"bad": bad, |
|
"is_proofreading_1": is_proofreading_1, |
|
"answer_text_1": answer_text_1, |
|
"is_proofreading_2": is_proofreading_2, |
|
"answer_text_2": answer_text_2, |
|
"score": int(score), |
|
"helpfulness": int(helpfulness), |
|
"correctness": int(correctness), |
|
"coherence": int(coherence), |
|
"complexity": int(complexity), |
|
"verbosity": int(verbosity), |
|
"humor": int(humor), |
|
"creativity": int(creativity), |
|
"appropriate": int(appropriate), |
|
"following_instructions": int(following_instructions), |
|
"politeness": int(politeness), |
|
"harmfulness": int(harmfulness), |
|
"hate": int(hate), |
|
"sexual": int(sexual), |
|
"violence": int(violence), |
|
"suicide": int(suicide), |
|
"threat": int(threat), |
|
"gun": int(gun), |
|
"controlled_substance": int(controlled_substance), |
|
"criminal_planing": int(criminal_planing), |
|
"privacy": int(privacy), |
|
"harassment": int(harassment), |
|
"profanity": int(profanity), |
|
"political_content": int(political_content), |
|
"moral_judgement": int(moral_judgement), |
|
"typos": int(typos) |
|
} |
|
f.write(json.dumps(data_to_write, ensure_ascii=False)) |
|
f.write("\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def hello(profile: gr.OAuthProfile | None) -> Tuple[str, str]: |
|
if profile is None: |
|
return "プライベートデータセット取得のためにログインしてください。", who.value |
|
who.value = profile.username |
|
return f'{profile.username} さん、よろしくお願いいたします。', who.value |
|
|
|
|
|
|
|
theme_ = gr.themes.Default() |
|
|
|
|
|
def load_css(): |
|
with open("style.css", "r") as file: |
|
css_content = file.read() |
|
return css_content |
|
|
|
|
|
with gr.Blocks(theme=theme_, css=load_css()) as demo: |
|
|
|
gr.Markdown("# データセット アノテーション for Tanuki (Phase2)") |
|
|
|
with gr.Row(): |
|
gr.Markdown("### GENIACにて開発中のLLM用データセットに対してアノテーションするSpaceです\n \ |
|
入力されたデータは使用される可能性があるため、個人情報・秘匿情報などは入力しないでください。\n \ |
|
テスト中です。") |
|
gr_who = gr.TextArea(value=who.value, lines=1, max_lines=1, label="ユーザー名 (入力してください)") |
|
|
|
def change_name(name: str): |
|
who.value = name |
|
|
|
gr_who.change( |
|
change_name, |
|
inputs=[gr_who], |
|
outputs=[] |
|
) |
|
|
|
with gr.Tab("アノテーション (シングルターン)"): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
|
def dropdown_select(select_value) -> None: |
|
select_dropdown_dataset.value = select_value |
|
|
|
|
|
gr_dropdown_dataset = gr.Dropdown( |
|
label="データセット選択 ①", |
|
choices=dropdown_dataset_list.value, |
|
value=select_dropdown_dataset.value, |
|
elem_id="dataset_sel", |
|
scale=2) |
|
|
|
gr_dropdown_dataset.change( |
|
dropdown_select, |
|
inputs=[gr_dropdown_dataset] |
|
) |
|
|
|
gr_data_load_btn = gr.Button("② データセットを読み込む") |
|
|
|
with gr.Column() as content_column: |
|
|
|
with gr.Tab("③ シンプル(良・悪)"): |
|
with gr.Column(): |
|
with gr.Row(equal_height=True): |
|
gr_good_btn = gr.Button("良い", interactive=False) |
|
gr_unknown_btn = gr.Button( |
|
"分からない", interactive=False) |
|
gr_bad_btn = gr.Button("悪い", interactive=False) |
|
|
|
gr_question_text_1_1 = gr.Textbox( |
|
label="質問: ", lines=5, interactive=False) |
|
|
|
gr_answer_text_1_1 = gr.Textbox( |
|
label="回答: 訂正して頂けると、品質が上がります。", |
|
lines=20, |
|
interactive=False) |
|
|
|
with gr.Tab("③ 5段階評価(シンプル)"): |
|
|
|
with gr.Row() as simple_score_btn: |
|
gr_score_5_btn = gr.Button("5: 高品質", interactive=False) |
|
gr_score_4_btn = gr.Button("4: 良い", interactive=False) |
|
gr_score_3_btn = gr.Button("3: 普通", interactive=False) |
|
gr_score_2_btn = gr.Button("2: 悪い", interactive=False) |
|
gr_score_1_btn = gr.Button("1: 低品質", interactive=False) |
|
|
|
gr_question_text_2_1 = gr.Textbox( |
|
label="質問: ", lines=5, interactive=False) |
|
|
|
gr_answer_text_2_1 = gr.Textbox( |
|
label="回答: 訂正して頂けると、品質が上がります。", lines=20, interactive=False) |
|
|
|
with gr.Tab("③ 5段階評価 (詳細)"): |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column() as EvalFive: |
|
|
|
gr_question_text_3_1 = gr.Textbox( |
|
label="質問: ", lines=10, interactive=False) |
|
|
|
|
|
|
|
gr_answer_text_3_1 = gr.Textbox( |
|
label="回答: 訂正して頂けると、品質が上がります。", lines=35, interactive=False) |
|
|
|
with gr.Column() as EvalFiveDetail: |
|
|
|
with gr.Row(): |
|
gr_submit_score = gr.Button("評価送信", interactive=False) |
|
gr_score_reset = gr.Button("スコアリセット", interactive=False) |
|
|
|
gr_score_detail = gr.Slider(label="総合スコア 【必須】",value=3, minimum=1, maximum=5, step=1, interactive=False) |
|
gr_eval_annotation_explain = gr.Markdown("詳細アノテーション (5点満点)") |
|
|
|
with gr.Row() : |
|
gr_helpfulness = gr.Slider(label="有用性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_correctness = gr.Slider(label="正確さ", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_coherence = gr.Slider(label="一貫性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_complexity = gr.Slider(label="複雑さ", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
|
|
with gr.Row() : |
|
gr_verbosity = gr.Slider(label="冗長性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_humor = gr.Slider(label="ユーモア", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_creativity = gr.Slider(label="創造性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_appropriate = gr.Slider(label="適切性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
|
|
with gr.Row() : |
|
gr_following_instructions = gr.Slider(label="忠実性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_politeness = gr.Slider(label="礼儀正しさ", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_harmfulness = gr.Slider(label="有害性", minimum=0, value=0, maximum=5, step=1, interactive=False) |
|
gr_text_4 = gr.Markdown("判断可能な物のみ\n\nスコアを付けてください。") |
|
|
|
gr_text_5 = gr.Markdown("") |
|
|
|
with gr.Row(): |
|
gr_typos = gr.Radio(label="誤字・脱字", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_moral_judgement = gr.Radio(label="非道徳", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_hate = gr.Radio(label="ヘイト", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_sexual = gr.Radio(label="性的内容", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_violence = gr.Radio(label="暴力的", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_suicide = gr.Radio(label="自殺行為", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_threat = gr.Radio(label="犯罪", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_gun = gr.Radio(label="銃等", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_controlled_substance = gr.Radio(label="規制対象物質", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_criminal_planing = gr.Radio(label="犯罪計画", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_privacy = gr.Radio(label="個人情報", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_harassment = gr.Radio(label="ハラスメント", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
with gr.Row(): |
|
gr_profanity = gr.Radio(label="冒涜行為", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
gr_political_content = gr.Radio(label="政治的内容", choices=["不明", "有", "無"], value="不明", interactive=False) |
|
|
|
|
|
|
|
def eval_submit( |
|
|
|
g_answer_text_3_1: str, |
|
|
|
|
|
g_score: int, |
|
g_helpfulness: int, |
|
g_correctness: int, |
|
g_coherence: int, |
|
g_complexity: int, |
|
g_verbosity: int, |
|
g_humor: int, |
|
g_creativity: int, |
|
g_appropriate: int, |
|
g_following_instructions: int, |
|
g_politeness: int, |
|
g_harmfulness: int, |
|
|
|
|
|
g_hate: str, |
|
g_sexual: str, |
|
g_violence: str, |
|
g_suicide: str, |
|
g_threat: str, |
|
g_gun: str, |
|
g_controlled_substance: str, |
|
g_criminal_planing: str, |
|
g_privacy: str, |
|
g_harassment: str, |
|
g_profanity: str, |
|
g_political_content: str, |
|
g_moral_judgement: str, |
|
g_typos: str, |
|
): |
|
|
|
good.value = False |
|
bad.value = False |
|
unknown_quality.value = False |
|
|
|
|
|
if initial_answer_text_1.value != g_answer_text_3_1: |
|
is_proofreading_1.value = True |
|
answer_text_1.value = g_answer_text_3_1 |
|
else: |
|
is_proofreading_1.value = False |
|
answer_text_1.value = "" |
|
|
|
|
|
score.value = g_score |
|
helpfulness.value = g_helpfulness |
|
correctness.value = g_correctness |
|
coherence.value = g_coherence |
|
complexity.value = g_complexity |
|
verbosity.value = g_verbosity |
|
humor.value = g_humor |
|
creativity.value = g_creativity |
|
appropriate.value = g_appropriate |
|
following_instructions.value = g_following_instructions |
|
politeness.value = g_politeness |
|
harmfulness.value = g_harmfulness |
|
|
|
hate.value = checkbox_to_int(g_hate) |
|
sexual.value = checkbox_to_int(g_sexual) |
|
violence.value = checkbox_to_int(g_violence) |
|
suicide.value = checkbox_to_int(g_suicide) |
|
threat.value = checkbox_to_int(g_threat) |
|
gun.value = checkbox_to_int(g_gun) |
|
controlled_substance.value = checkbox_to_int(g_controlled_substance) |
|
criminal_planing.value = checkbox_to_int(g_criminal_planing) |
|
privacy.value = checkbox_to_int(g_privacy) |
|
harassment.value = checkbox_to_int(g_harassment) |
|
profanity.value = checkbox_to_int(g_profanity) |
|
political_content.value = checkbox_to_int(g_political_content) |
|
moral_judgement.value = checkbox_to_int(g_moral_judgement) |
|
typos.value = checkbox_to_int(g_typos) |
|
|
|
|
|
|
|
|
|
|
|
select_idx.value += 1 |
|
|
|
df = select_dataset.value["train"] |
|
|
|
|
|
if select_idx.value >= len(df): |
|
select_idx.value = 0 |
|
|
|
|
|
df = select_dataset.value["train"] |
|
save_annotation( |
|
select_dropdown_dataset.value, |
|
df.iloc[select_idx.value]['index'], |
|
who.value, |
|
unknown_quality.value, |
|
good.value, |
|
bad.value, |
|
is_proofreading_1.value, |
|
answer_text_1.value, |
|
is_proofreading_2.value, |
|
answer_text_2.value, |
|
score.value, |
|
helpfulness.value, |
|
correctness.value, |
|
coherence.value, |
|
complexity.value, |
|
verbosity.value, |
|
humor.value, |
|
creativity.value, |
|
appropriate.value, |
|
following_instructions.value, |
|
politeness.value, |
|
harmfulness.value, |
|
hate.value, |
|
sexual.value, |
|
violence.value, |
|
suicide.value, |
|
threat.value, |
|
gun.value, |
|
controlled_substance.value, |
|
criminal_planing.value, |
|
privacy.value, |
|
harassment.value, |
|
profanity.value, |
|
political_content.value, |
|
moral_judgement.value, |
|
typos.value, |
|
) |
|
|
|
|
|
initialize_next_data(df) |
|
|
|
|
|
score_reset_display() |
|
|
|
|
|
return gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
*score_reset_display() |
|
|
|
gr_submit_score.click( |
|
eval_submit, |
|
inputs=[ |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
], |
|
|
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
], |
|
) |
|
|
|
|
|
def score_button_clicked(button_value): |
|
|
|
good.value = False |
|
bad.value = False |
|
unknown_quality.value = False |
|
score.value = button_value |
|
|
|
|
|
def update_annotation( |
|
input_ans_1: str = None, |
|
input_ans_2: str = None, |
|
is_good: bool = None, |
|
is_unknown: bool = None, |
|
is_simple: bool = None, |
|
score_value: int = None |
|
) -> Tuple[ |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
gr.update, |
|
]: |
|
|
|
|
|
update_evaluation_state(is_good, is_unknown, score_value) |
|
|
|
|
|
update_answer_state(input_ans_1, input_ans_2) |
|
|
|
|
|
|
|
select_idx.value += 1 |
|
|
|
df = select_dataset.value["train"] |
|
|
|
|
|
if select_idx.value >= len(df): |
|
select_idx.value = 0 |
|
|
|
|
|
if is_simple == True: |
|
score.value = 0 |
|
|
|
|
|
|
|
save_annotation_data(df) |
|
|
|
|
|
initialize_next_data(df) |
|
|
|
|
|
reset_values = score_reset_display() |
|
|
|
return gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["question"]), \ |
|
gr.update(value=df.iloc[select_idx.value]["answer"]), \ |
|
*reset_values |
|
|
|
|
|
|
|
def update_evaluation_state(is_good, is_unknown, score_value): |
|
if score_value is not None: |
|
good.value = False |
|
bad.value = False |
|
unknown_quality.value = False |
|
score.value = score_value |
|
else: |
|
good.value = is_good |
|
bad.value = not is_good if not is_unknown else False |
|
unknown_quality.value = is_unknown |
|
|
|
|
|
def update_answer_state(input_ans_1, input_ans_2): |
|
if input_ans_1 is not None and initial_answer_text_1.value != input_ans_1: |
|
is_proofreading_1.value = True |
|
answer_text_1.value = input_ans_1 |
|
else: |
|
answer_text_1.value = "" |
|
|
|
|
|
if input_ans_2 is not None and initial_answer_text_2.value != input_ans_2: |
|
is_proofreading_2.value = True |
|
answer_text_2.value = input_ans_2 |
|
else: |
|
answer_text_2.value = "" |
|
|
|
|
|
def save_annotation_data(df): |
|
save_annotation( |
|
select_dropdown_dataset.value, |
|
|
|
df.iloc[select_idx.value]['index'], |
|
who.value, |
|
unknown_quality.value, |
|
good.value, |
|
bad.value, |
|
is_proofreading_1.value, |
|
answer_text_1.value, |
|
is_proofreading_2.value, |
|
answer_text_2.value, |
|
score.value, |
|
helpfulness.value, |
|
correctness.value, |
|
coherence.value, |
|
complexity.value, |
|
verbosity.value, |
|
humor.value, |
|
creativity.value, |
|
appropriate.value, |
|
following_instructions.value, |
|
politeness.value, |
|
harmfulness.value, |
|
hate.value, |
|
sexual.value, |
|
violence.value, |
|
suicide.value, |
|
threat.value, |
|
gun.value, |
|
controlled_substance.value, |
|
criminal_planing.value, |
|
privacy.value, |
|
harassment.value, |
|
profanity.value, |
|
political_content.value, |
|
moral_judgement.value, |
|
typos.value, |
|
) |
|
|
|
|
|
def initialize_next_data(df): |
|
is_proofreading_1.value = False |
|
is_proofreading_2.value = False |
|
initial_answer_text_1.value = df.iloc[select_idx.value]["answer"] |
|
initial_answer_text_2.value = df.iloc[select_idx.value]["answer"] |
|
|
|
|
|
def unknown_click(input_ans_1, input_ans_2): |
|
good.value = False |
|
bad.value = False |
|
score_reset_display() |
|
return update_annotation( |
|
input_ans_1=input_ans_1, |
|
input_ans_2=input_ans_2, |
|
is_good=False, |
|
is_unknown=True, |
|
is_simple=True, |
|
) |
|
|
|
|
|
gr_unknown_btn.click( |
|
unknown_click, |
|
inputs=[ |
|
gr_answer_text_1_1, |
|
gr_answer_text_2_1 |
|
], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
] |
|
) |
|
|
|
|
|
def good_click(input_ans_1, input_ans_2): |
|
score_reset_display() |
|
return update_annotation( |
|
input_ans_1=input_ans_1, |
|
input_ans_2=input_ans_2, |
|
is_good=True, |
|
is_unknown=False, |
|
is_simple=True, |
|
) |
|
|
|
|
|
gr_good_btn.click( |
|
good_click, |
|
inputs=[ |
|
gr_answer_text_1_1, |
|
gr_answer_text_2_1 |
|
], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
] |
|
) |
|
|
|
|
|
def bad_click(input_ans_1, input_ans_2): |
|
score_reset_display() |
|
return update_annotation( |
|
input_ans_1=input_ans_1, |
|
input_ans_2=input_ans_2, |
|
is_good=False, |
|
is_unknown=False, |
|
is_simple=True, |
|
) |
|
|
|
|
|
gr_bad_btn.click( |
|
bad_click, |
|
inputs=[ |
|
gr_answer_text_1_1, |
|
gr_answer_text_2_1 |
|
], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
] |
|
) |
|
|
|
|
|
gr_score_1_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=1), |
|
inputs=[gr_answer_text_2_1], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
]) |
|
gr_score_2_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=2), |
|
inputs=[gr_answer_text_2_1], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
]) |
|
gr_score_3_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=3), |
|
inputs=[gr_answer_text_2_1], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
]) |
|
gr_score_4_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=4), |
|
inputs=[gr_answer_text_2_1], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
]) |
|
gr_score_5_btn.click(lambda x: update_annotation(input_ans_1=x, is_unknown=False, score_value=5), |
|
inputs=[gr_answer_text_2_1], |
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
]) |
|
|
|
|
|
def score_reset_display(): |
|
|
|
score.value = 3 |
|
helpfulness.value = 0 |
|
correctness.value = 0 |
|
coherence.value = 0 |
|
complexity.value = 0 |
|
verbosity.value = 0 |
|
humor.value = 0 |
|
creativity.value = 0 |
|
appropriate.value = 0 |
|
following_instructions.value = 0 |
|
politeness.value = 0 |
|
harmfulness.value = 0 |
|
hate.value = 0 |
|
sexual.value = 0 |
|
violence.value = 0 |
|
suicide.value = 0 |
|
threat.value = 0 |
|
gun.value = 0 |
|
controlled_substance.value = 0 |
|
criminal_planing.value = 0 |
|
privacy.value = 0 |
|
harassment.value = 0 |
|
profanity.value = 0 |
|
political_content.value = 0 |
|
moral_judgement.value = 0 |
|
typos.value = 0 |
|
|
|
return 3,0,0,0,0,0,0,0,0,0,0,0, \ |
|
"不明", "不明", "不明", "不明", "不明", \ |
|
"不明", "不明", "不明", "不明", "不明", \ |
|
"不明", "不明", "不明", "不明" |
|
|
|
|
|
gr_score_reset.click( |
|
score_reset_display, |
|
inputs=[], |
|
outputs=[ |
|
gr_score_detail, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
] |
|
) |
|
|
|
|
|
gr_data_load_btn.click( |
|
dataset_load_fn, |
|
inputs=None, |
|
|
|
outputs=[ |
|
gr_question_text_1_1, |
|
gr_answer_text_1_1, |
|
gr_question_text_2_1, |
|
gr_answer_text_2_1, |
|
gr_question_text_3_1, |
|
gr_answer_text_3_1, |
|
gr_answer_text_1_1, |
|
gr_answer_text_2_1, |
|
gr_answer_text_3_1, |
|
gr_unknown_btn, |
|
gr_good_btn, |
|
gr_bad_btn, |
|
gr_score_5_btn, |
|
gr_score_4_btn, |
|
gr_score_3_btn, |
|
gr_score_2_btn, |
|
gr_score_1_btn, |
|
gr_submit_score, |
|
gr_score_detail, |
|
gr_score_reset, |
|
gr_helpfulness, |
|
gr_correctness, |
|
gr_coherence, |
|
gr_complexity, |
|
gr_verbosity, |
|
gr_humor, |
|
gr_creativity, |
|
gr_appropriate, |
|
gr_following_instructions, |
|
gr_politeness, |
|
gr_harmfulness, |
|
gr_hate, |
|
gr_sexual, |
|
gr_violence, |
|
gr_suicide, |
|
gr_threat, |
|
gr_gun, |
|
gr_controlled_substance, |
|
gr_criminal_planing, |
|
gr_privacy, |
|
gr_harassment, |
|
gr_profanity, |
|
gr_political_content, |
|
gr_moral_judgement, |
|
gr_typos |
|
] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |