import os import random import uuid import gradio as gr from datasets import load_dataset HF_TOKEN = os.environ.get('HF_TOKEN') HF_DATASET = os.environ.get('HF_DATASET') configuration = "commitchronicle-py-long" # select a configuration dataset = load_dataset("JetBrains-Research/lca-cmg", configuration, split="test", cache_dir="data") n_samples = len(dataset) saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True) def get_github_api_url(repo, hash): repo_url = f"https://api.github.com/repos/{repo}/commits/{hash}" return repo_url DIFF_VIEW_UPDATE_JS_FN = """ """ def get_diff2html_view(github_api_url): html = f"""
""" return html def update_commit_view(sample_ind): if sample_ind >= n_samples: return None record = dataset[sample_ind] github_api_url = get_github_api_url(record['repo'], record['hash']) # github_link_md = f"[See the commit on GitHub]({github_link})" diff_view = get_diff2html_view(github_api_url) commit_msg = record['message'] repo_val = record['repo'] hash_val = record['hash'] return diff_view, commit_msg, repo_val, hash_val def next_sample(current_sample_ind, shuffled_idx): if current_sample_ind == n_samples: return None current_sample_ind += 1 updated_view = update_commit_view(shuffled_idx[current_sample_ind]) return (current_sample_ind,) + updated_view DIFF2HTML_IMPORTS = """ """ with gr.Blocks(theme=gr.themes.Soft(), head=DIFF2HTML_IMPORTS + DIFF_VIEW_UPDATE_JS_FN) as application: repo_val = gr.Textbox(interactive=False, label='repo', visible=False) hash_val = gr.Textbox(interactive=False, label='hash', visible=False) shuffled_idx_val = gr.JSON(visible=False) with gr.Row(): current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1, value=0, interactive=False, label='sample_ind', info=f"Samples labeled/skipped (out of {n_samples})", show_label=False, container=False, scale=5) with gr.Column(scale=1): skip_btn = gr.Button("Skip the current sample") with gr.Row(): with gr.Column(scale=2): # github_link = gr.Markdown() diff_view = gr.HTML() with gr.Column(scale=1): commit_msg = gr.Textbox(label="Commit message", interactive=False, ) gr.Markdown("## Please, answer the questions below") verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?', label='verbosity', show_label=False, choices=[ ('Too short', 0), ('Just right', 1), ('Too verbose', 2)]) correctness_feedback = gr.Radio(info='Is the commit message factually correct?', label='is_correct', show_label=False, choices=[ ('Yes', True), ('No', False)]) format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)', label='format_score', show_label=False, minimum=1, step=1, interactive=True, maximum=5) submit_btn = gr.Button("Submit and continue") session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False, label='session') commit_view = [ # github_link, diff_view, commit_msg, repo_val, hash_val ] feedback_form = [ session_val, repo_val, hash_val, verbosity_feedback, correctness_feedback, format_feedback ] saver.setup([current_sample_sld] + feedback_form, "feedback") skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], outputs=[current_sample_sld] + commit_view) def submit(current_sample, shuffled_idx, *args): saver.flag((current_sample,) + args) return next_sample(current_sample, shuffled_idx) submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form, outputs=[current_sample_sld] + commit_view) def init_session(current_sample): session = str(uuid.uuid4()) shuffled_idx = list(range(n_samples)) random.shuffle(shuffled_idx) return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample]) application.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view) application.launch()