commit-labeling / app.py
Petr Tsvetkov
Introduce shuffled index for samples
7af62b1
raw
history blame
5.12 kB
import random
import uuid
import gradio as gr
from datasets import load_dataset
configuration = "commitchronicle-py-long" # select a configuration
dataset = load_dataset("JetBrains-Research/lca-cmg",
configuration,
split="test",
cache_dir="data")
n_samples = len(dataset)
saver = gr.CSVLogger()
def get_github_link(repo, hash):
repo_url = f"https://github.com/{repo}/commit/{hash}"
return repo_url
def update_commit_view(sample_ind):
if sample_ind >= n_samples:
return None
record = dataset[sample_ind]
github_link_md = f"[See the commit on GitHub]({get_github_link(record['repo'], record['hash'])})"
diff_json = record['mods']
commit_msg = record['message']
repo_val = record['repo']
hash_val = record['hash']
return github_link_md, diff_json, commit_msg, repo_val, hash_val
def next_sample(current_sample_ind, shuffled_idx):
if current_sample_ind == n_samples:
return None
current_sample_ind += 1
updated_view = update_commit_view(shuffled_idx[current_sample_ind])
return (current_sample_ind,) + updated_view
with gr.Blocks(theme=gr.themes.Soft()) as demo:
repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
shuffled_idx_val = gr.JSON(visible=False)
with gr.Row():
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
value=0,
interactive=False,
label='sample_ind',
info=f"Samples labeled/skipped (out of {n_samples})",
show_label=False,
container=False,
scale=5)
with gr.Column(scale=1):
skip_btn = gr.Button("Skip the current sample")
with gr.Row():
with gr.Column(scale=2):
github_link = gr.Markdown()
diff_view = gr.JSON()
with gr.Column(scale=1):
commit_msg = gr.Textbox(label="AI-generated commit message",
interactive=False,
)
gr.Markdown("## Please, answer the questions below")
verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?',
label='verbosity',
show_label=False,
choices=[
('Too short', 0),
('Just right', 1),
('Too verbose', 2)])
correctness_feedback = gr.Radio(info='Is the commit message factually correct?',
label='is_correct',
show_label=False,
choices=[
('Yes', True),
('No', False)])
format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)',
label='format_score',
show_label=False,
minimum=1,
step=1,
interactive=True,
maximum=5)
submit_btn = gr.Button("Submit and continue")
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
label='session')
commit_view = [
github_link,
diff_view,
commit_msg,
repo_val,
hash_val
]
feedback_form = [
session_val,
repo_val,
hash_val,
verbosity_feedback,
correctness_feedback,
format_feedback
]
saver.setup(feedback_form, "feedback")
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
outputs=[current_sample_sld] + commit_view)
def submit(current_sample, shuffled_idx, *args):
saver.flag([current_sample] + args)
return next_sample(current_sample, shuffled_idx)
submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form,
outputs=[current_sample_sld] + commit_view)
def init_session(current_sample):
session = str(uuid.uuid4())
shuffled_idx = list(range(n_samples))
random.shuffle(shuffled_idx)
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])
demo.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view)
demo.launch()