File size: 5,115 Bytes
7af62b1
1b24b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7af62b1
1b24b13
 
 
 
7af62b1
1b24b13
 
 
 
7af62b1
 
 
 
1b24b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7af62b1
 
1b24b13
 
7af62b1
 
 
1b24b13
 
7af62b1
 
1b24b13
 
7af62b1
1b24b13
7af62b1
 
 
1b24b13
 
7af62b1
1b24b13
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import random
import uuid

import gradio as gr
from datasets import load_dataset

configuration = "commitchronicle-py-long"  # select a configuration
dataset = load_dataset("JetBrains-Research/lca-cmg",
                       configuration,
                       split="test",
                       cache_dir="data")
n_samples = len(dataset)

saver = gr.CSVLogger()


def get_github_link(repo, hash):
    repo_url = f"https://github.com/{repo}/commit/{hash}"
    return repo_url


def update_commit_view(sample_ind):
    if sample_ind >= n_samples:
        return None

    record = dataset[sample_ind]
    github_link_md = f"[See the commit on GitHub]({get_github_link(record['repo'], record['hash'])})"
    diff_json = record['mods']
    commit_msg = record['message']
    repo_val = record['repo']
    hash_val = record['hash']
    return github_link_md, diff_json, commit_msg, repo_val, hash_val


def next_sample(current_sample_ind, shuffled_idx):
    if current_sample_ind == n_samples:
        return None

    current_sample_ind += 1
    updated_view = update_commit_view(shuffled_idx[current_sample_ind])
    return (current_sample_ind,) + updated_view


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
    hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
    shuffled_idx_val = gr.JSON(visible=False)

    with gr.Row():
        current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
                                       value=0,
                                       interactive=False,
                                       label='sample_ind',
                                       info=f"Samples labeled/skipped (out of {n_samples})",
                                       show_label=False,
                                       container=False,
                                       scale=5)
        with gr.Column(scale=1):
            skip_btn = gr.Button("Skip the current sample")
    with gr.Row():
        with gr.Column(scale=2):
            github_link = gr.Markdown()
            diff_view = gr.JSON()
        with gr.Column(scale=1):
            commit_msg = gr.Textbox(label="AI-generated commit message",
                                    interactive=False,
                                    )
            gr.Markdown("## Please, answer the questions below")
            verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?',
                                          label='verbosity',
                                          show_label=False,
                                          choices=[
                                              ('Too short', 0),
                                              ('Just right', 1),
                                              ('Too verbose', 2)])
            correctness_feedback = gr.Radio(info='Is the commit message factually correct?',
                                            label='is_correct',
                                            show_label=False,
                                            choices=[
                                                ('Yes', True),
                                                ('No', False)])
            format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)',
                                        label='format_score',
                                        show_label=False,
                                        minimum=1,
                                        step=1,
                                        interactive=True,
                                        maximum=5)
            submit_btn = gr.Button("Submit and continue")
            session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
                                     label='session')

    commit_view = [
        github_link,
        diff_view,
        commit_msg,
        repo_val,
        hash_val
    ]

    feedback_form = [
        session_val,
        repo_val,
        hash_val,
        verbosity_feedback,
        correctness_feedback,
        format_feedback
    ]

    saver.setup(feedback_form, "feedback")

    skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
                   outputs=[current_sample_sld] + commit_view)


    def submit(current_sample, shuffled_idx, *args):
        saver.flag([current_sample] + args)
        return next_sample(current_sample, shuffled_idx)


    submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form,
                     outputs=[current_sample_sld] + commit_view)


    def init_session(current_sample):
        session = str(uuid.uuid4())
        shuffled_idx = list(range(n_samples))
        random.shuffle(shuffled_idx)
        return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])


    demo.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view)

demo.launch()