commit-message-editing-visualization / change_visualizer.py
Petr Tsvetkov
Add datasets comparison
f26a894
raw
history blame
4.71 kB
import gradio as gr
import generate_annotated_diffs
import statistics
df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
n_diffs_manual = len(df_manual)
df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
n_diffs_synthetic = len(df_synthetic)
STATISTICS = {"manual": statistics.get_statistics_for_manual_df(df_manual),
"synthetic": statistics.get_statistics_for_synthetic_df(df_synthetic)}
def update_manual_view(diff_idx):
diff_idx -= 1
return df_manual.iloc[diff_idx]['annotated_diff'], df_manual.iloc[diff_idx]['commit_msg_start'], \
df_manual.iloc[diff_idx][
'commit_msg_end'], df_manual.iloc[diff_idx][
'session'], f"https://github.com/{df_manual.iloc[diff_idx]['repo']}/commit/{df_manual.iloc[diff_idx]['hash']}"
def update_synthetic_view(diff_idx):
diff_idx -= 1
return (df_synthetic.iloc[diff_idx]['annotated_diff'], df_synthetic.iloc[diff_idx]['initial_msg_pred'],
df_synthetic.iloc[diff_idx]['reference'],
f"https://github.com/{df_synthetic.iloc[diff_idx]['repo']}/commit/{df_synthetic.iloc[diff_idx]['hash']}")
force_light_theme_js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'light') {
url.searchParams.set('__theme', 'light');
window.location.href = url.href;
}
}
"""
if __name__ == '__main__':
with gr.Blocks(theme=gr.themes.Soft(), js=force_light_theme_js_func) as application:
with gr.Tab("Manual"):
slider_manual = gr.Slider(minimum=1, maximum=n_diffs_manual, step=1, value=1,
label=f"Sample number (total: {n_diffs_manual})")
diff_view_manual = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"})
start_view_manual = gr.Textbox(interactive=False, label="Start message", container=True)
end_view_manual = gr.Textbox(interactive=False, label="End message", container=True)
session_view_manual = gr.Textbox(interactive=False, label="Session", container=True)
link_view_manual = gr.Markdown()
view_manual = [
diff_view_manual,
start_view_manual,
end_view_manual,
session_view_manual,
link_view_manual
]
slider_manual.change(update_manual_view, inputs=slider_manual,
outputs=view_manual)
with gr.Tab("Synthetic"):
slider_synthetic = gr.Slider(minimum=1, maximum=n_diffs_synthetic, step=1, value=1,
label=f"Sample number (total: {n_diffs_synthetic})")
diff_view_synthetic = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"})
start_view_synthetic = gr.Textbox(interactive=False, label="Start message", container=True)
end_view_synthetic = gr.Textbox(interactive=False, label="End message", container=True)
link_view_synthetic = gr.Markdown()
view_synthetic = [
diff_view_synthetic,
start_view_synthetic,
end_view_synthetic,
link_view_synthetic
]
slider_synthetic.change(update_synthetic_view, inputs=slider_synthetic,
outputs=view_synthetic)
with gr.Tab("Compare"):
def layout_for_statistics(statistics_group_name):
gr.Markdown(f"### {statistics_group_name}")
stats = STATISTICS[statistics_group_name]
gr.Number(label="Average deletions number (rel to the result length)", interactive=False,
value=stats['deletions'].mean().item(), precision=3)
gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
value=stats['insertions'].mean().item(), precision=3)
gr.Number(label="Average changes number (rel to the result length)", interactive=False,
value=stats['changes'].mean().item(), precision=3)
with gr.Row():
with gr.Column(scale=1):
layout_for_statistics("manual")
with gr.Column(scale=1):
layout_for_statistics("synthetic")
application.load(update_manual_view, inputs=slider_manual,
outputs=view_manual)
application.load(update_synthetic_view, inputs=slider_synthetic,
outputs=view_synthetic)
application.launch()