commit-labeling / app.py
Petr Tsvetkov
Force the light theme; add requirements.txt
6f224a6
raw
history blame
11.3 kB
import os
import random
import uuid
from datetime import datetime
from itertools import chain
import gradio as gr
from data_loader import load_data
HF_TOKEN = os.environ.get('HF_TOKEN')
HF_DATASET = os.environ.get('HF_DATASET')
N_QUESTIONS = 5
data, models = load_data()
n_samples = len(data)
n_models = len(models)
saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True)
def convert_diff_to_unified(diff):
result = "\n".join(
[
f'--- {modified_file["old_path"]}\n'
f'+++ {modified_file["new_path"]}\n'
f'{modified_file["diff"]}'
for modified_file in diff
]
)
return result
def get_diff2html_view(raw_diff):
html = f"""
<div style='width:100%; height:1400px; overflow:auto; position: relative'>
<div id='diff-raw' hidden>{raw_diff}</div>
<div class="d2h-view-wrapper">
<div id='diff-view'></div>
</div>
</div>
"""
return html
def get_github_link_md(repo, hash):
return f'[See the commit on Github](https://github.com/{repo}/commit/{hash})'
def update_commit_view(sample_ind):
if sample_ind >= n_samples:
return None
record = data[sample_ind]
diff_view = get_diff2html_view(convert_diff_to_unified(record['mods']))
repo_val = record['repo']
hash_val = record['hash']
github_link_md = get_github_link_md(repo_val, hash_val)
diff_loaded_timestamp = datetime.now().isoformat()
models_shuffled = models[:]
random.shuffle(models_shuffled)
commit_messages = tuple(record[model] for model in models_shuffled)
return (
github_link_md, diff_view, repo_val, hash_val, diff_loaded_timestamp,
n_forms_submitted) + commit_messages + tuple(
models_shuffled)
def reset_answers():
return (None,) * (N_QUESTIONS * n_models)
def reset_submit_buttons():
return tuple(gr.Button(value="Submit", interactive=True) for _ in range(n_models))
def reset_continue_button():
return gr.Button(value=f"0/{n_models} forms submitted", interactive=False)
def next_sample(current_sample_ind, shuffled_idx):
if current_sample_ind == n_samples:
return None
current_sample_ind += 1
updated_view = update_commit_view(shuffled_idx[current_sample_ind])
return (current_sample_ind,) + updated_view + reset_answers() + reset_submit_buttons() + (reset_continue_button(),)
with open("head.html") as head_file:
head_html = head_file.read()
force_light_theme_js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'light') {
url.searchParams.set('__theme', 'light');
window.location.href = url.href;
}
}
"""
with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css",
js=force_light_theme_js_func) as application:
repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
shuffled_idx_val = gr.JSON(visible=False)
with gr.Row():
with gr.Accordion("Help"):
with open("survey_guide.md") as content_file:
gr.Markdown(content_file.read())
with gr.Row():
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
value=0,
interactive=False,
label='sample_ind',
info=f"Samples labeled/skipped (out of {n_samples})",
show_label=False,
container=False,
scale=5)
with gr.Column(scale=1):
skip_btn = gr.Button("Skip the current sample")
with gr.Row():
with gr.Column(scale=2):
github_link = gr.Markdown()
diff_view = gr.HTML()
with gr.Column(scale=1):
# commit_msg = []
# is_correct = []
# # has_what = []
# # has_why = []
# is_not_verbose = []
# # has_headline = []
# easy_to_read = []
# overall_rating = []
# comments = []
# model_name = []
commit_msgs = []
questions = []
model_names = []
submit_buttons = []
SCALE = list(range(1, 6))
for model_ind in range(n_models):
with gr.Tab(f"Variant #{model_ind + 1}"):
commit_msgs.append(gr.TextArea(label="Commit message (can be scrollable)",
interactive=False,
))
gr.Markdown("## Please, rate your level of agreement with each statement\n"
"\n"
"*1 - strongly disagree, 2 - disagree, 3 - not sure, 4 - agree, 5 - strongly agree*")
model_questions = []
model_questions.append(gr.Radio(
info='The information provided in the commit message is consistent with the code changes.',
label=f'is_correct',
show_label=False,
choices=SCALE,
interactive=True))
# model_questions.append(gr.Radio(
# info='The commit message answers the question of WHAT changes have been made.',
# label=f'has_what',
# show_label=False,
# choices=SCALE,
# interactive=True))
#
# model_questions.append(gr.Radio(
# info='The commit message answers the question of WHY these changes have been made.',
# label=f'has_why',
# show_label=False,
# choices=SCALE,
# interactive=True))
model_questions.append(gr.Radio(
info='The commit message cannot be substantially shortened without loss of important '
'information.',
label=f'is_not_verbose',
show_label=False,
choices=SCALE,
interactive=True))
# model_questions.append(gr.Radio(
# info='The commit message includes a short headline that provides a good overview of the '
# 'changes.',
# label=f'has_headline',
# show_label=False,
# choices=SCALE,
# interactive=True))
model_questions.append(gr.Radio(
info='The commit message is easy to read and to understand.',
label=f'easy_to_read',
show_label=False,
choices=SCALE,
interactive=True))
model_questions.append(gr.Radio(
info='Please, describe your overall impression of the commit message (1 - very bad, 5 - very '
'good)',
label=f'overall_rating',
show_label=False,
choices=SCALE,
interactive=True))
model_questions.append(gr.Textbox(
info='Additional comments on the commit message',
label=f'comments',
show_label=False,
interactive=True))
assert len(model_questions) == N_QUESTIONS
questions.append(model_questions)
model_names.append(gr.Textbox(interactive=False, label=f'model', visible=False))
submit_buttons.append(gr.Button(value="Submit"))
n_forms_submitted = gr.Number(visible=False, value=0, precision=0)
continue_btn = reset_continue_button()
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
label='session')
with gr.Row(visible=False):
sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False,
container=True, show_label=False)
sample_submitted_timestamp = gr.Textbox(info="Current time",
interactive=False, container=True, show_label=False,
value=lambda: datetime.now().isoformat(), every=1.0,
label='submitted_ts')
commit_view = [
github_link,
diff_view,
repo_val,
hash_val,
sample_loaded_timestamp,
n_forms_submitted,
*commit_msgs,
*model_names,
]
feedback_metadata = [
session_val,
repo_val,
hash_val,
sample_loaded_timestamp,
sample_submitted_timestamp
]
saver.setup([current_sample_sld] + feedback_metadata + questions[0] + [model_names[0], ], "feedback")
questions_list = list(chain.from_iterable(questions))
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn])
continue_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
outputs=[current_sample_sld] + commit_view + questions_list + submit_buttons + [continue_btn])
def submit_for_model(current_sample, n_forms_submitted_val, *args):
saver.flag((current_sample,) + args)
n_forms_submitted_val += 1
all_forms_submitted = n_forms_submitted_val == n_models
return (gr.Button(value="Submitted", interactive=False),
n_forms_submitted_val,
gr.Button(
"Next sample" if all_forms_submitted else f"{n_forms_submitted_val}/{n_models} forms submitted",
interactive=all_forms_submitted))
for model_ind in range(n_models):
submit_buttons[model_ind].click(
submit_for_model,
inputs=[current_sample_sld, n_forms_submitted] + feedback_metadata + questions[model_ind] + [
model_names[model_ind], ],
outputs=[submit_buttons[model_ind], n_forms_submitted, continue_btn]
)
def init_session(current_sample):
session = str(uuid.uuid4())
shuffled_idx = list(range(n_samples))
random.shuffle(shuffled_idx)
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])
application.load(init_session,
inputs=[current_sample_sld],
outputs=[session_val, shuffled_idx_val] + commit_view, )
application.launch()