Spaces:
Runtime error
Runtime error
Petr Tsvetkov
Generate charts for the presentation & diploma;some refactoring; add (commented) Student's t-test
7ab7be2
| import gradio as gr | |
| import analysis_util | |
| import generate_annotated_diffs | |
| import dataset_statistics | |
| df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs() | |
| df_manual["end_to_start"] = False | |
| df_manual["start_to_end"] = False | |
| n_diffs_manual = len(df_manual) | |
| df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs() | |
| n_diffs_synthetic = len(df_synthetic) | |
| def golden(): | |
| return df_manual | |
| def e2s(): | |
| return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == False)] | |
| def s2e(): | |
| return df_synthetic[(df_synthetic['end_to_start'] == False) & (df_synthetic['start_to_end'] == True)] | |
| def e2s_s2e(): | |
| return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == True)] | |
| def synthetic(): | |
| return df_synthetic[(df_synthetic['end_to_start'] == True) | (df_synthetic['start_to_end'] == True)] | |
| STATISTICS = {"manual": dataset_statistics.get_statistics_for_df(df_manual), | |
| "e2s": dataset_statistics.get_statistics_for_df(e2s()), | |
| "s2e": dataset_statistics.get_statistics_for_df(s2e()), | |
| "e2s_s2e": dataset_statistics.get_statistics_for_df(e2s_s2e()), | |
| "synthetic": dataset_statistics.get_statistics_for_df(synthetic()), | |
| "all": dataset_statistics.get_statistics_for_df(df_synthetic)} | |
| STATISTICS_T_TEST = dataset_statistics.t_test(STATISTICS, main_group='manual') | |
| STAT_NAMES = list(STATISTICS['manual'].keys()) | |
| def update_dataset_view(diff_idx, df): | |
| diff_idx -= 1 | |
| return (df.iloc[diff_idx]['annotated_diff'], | |
| df.iloc[diff_idx]['commit_msg_start'], | |
| df.iloc[diff_idx]['commit_msg_end'], | |
| df.iloc[diff_idx]['session'], | |
| str(df.iloc[diff_idx]['end_to_start']), | |
| str(df.iloc[diff_idx]['start_to_end']), | |
| f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",) | |
| def update_dataset_view_manual(diff_idx): | |
| return update_dataset_view(diff_idx, df_manual) | |
| def update_dataset_view_synthetic(diff_idx): | |
| return update_dataset_view(diff_idx, df_synthetic) | |
| force_light_theme_js_func = """ | |
| function refresh() { | |
| const url = new URL(window.location); | |
| if (url.searchParams.get('__theme') !== 'light') { | |
| url.searchParams.set('__theme', 'light'); | |
| window.location.href = url.href; | |
| } | |
| } | |
| """ | |
| if __name__ == '__main__': | |
| with gr.Blocks(theme=gr.themes.Soft(), js=force_light_theme_js_func) as application: | |
| def dataset_view_tab(n_items): | |
| slider = gr.Slider(minimum=1, maximum=n_items, step=1, value=1, | |
| label=f"Sample number (total: {n_items})") | |
| diff_view = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"}) | |
| start_view = gr.Textbox(interactive=False, label="Start message", container=True) | |
| end_view = gr.Textbox(interactive=False, label="End message", container=True) | |
| session_view = gr.Textbox(interactive=False, label="Session", container=True) | |
| is_end_to_start_view = gr.Textbox(interactive=False, | |
| label="Is generated on the 'end-to-start' synthesis step?", | |
| container=True) | |
| is_start_to_end_view = gr.Textbox(interactive=False, | |
| label="Is generated on the 'start-to-end' synthesis step?", | |
| container=True) | |
| link_view = gr.Markdown() | |
| view = [ | |
| diff_view, | |
| start_view, | |
| end_view, | |
| session_view, | |
| is_end_to_start_view, | |
| is_start_to_end_view, | |
| link_view | |
| ] | |
| return slider, view | |
| with gr.Tab("Manual"): | |
| slider_manual, view_manual = dataset_view_tab(n_diffs_manual) | |
| slider_manual.change(update_dataset_view_manual, inputs=slider_manual, | |
| outputs=view_manual) | |
| with gr.Tab("Synthetic"): | |
| slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic) | |
| slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic, | |
| outputs=view_synthetic) | |
| with gr.Tab("Analysis"): | |
| def layout_for_statistics(statistics_group_name): | |
| gr.Markdown(f"### {statistics_group_name}") | |
| stats = STATISTICS[statistics_group_name] | |
| gr.Number(label="Count", interactive=False, | |
| value=len(stats['deletions_norm']), min_width=00) | |
| gr.Number(label="Avg deletions number (rel to the initial msg length)", interactive=False, | |
| value=stats['deletions_norm'].mean().item(), precision=3, min_width=00) | |
| gr.Number(label="Avg insertions number (rel to the result length)", interactive=False, | |
| value=stats['insertions_norm'].mean().item(), precision=3, min_width=00) | |
| gr.Number(label="Avg changes number (rel to the initial msg length)", interactive=False, | |
| value=stats['changes_norm'].mean().item(), precision=3, min_width=00) | |
| gr.Number(label="Avg deletions number", interactive=False, | |
| value=stats['deletions'].mean().item(), precision=3, min_width=00) | |
| gr.Number(label="Avg insertions number", interactive=False, | |
| value=stats['insertions'].mean().item(), precision=3, min_width=00) | |
| gr.Number(label="Avg changes number", interactive=False, | |
| value=stats['changes'].mean().item(), precision=3, min_width=00) | |
| def layout_for_statistics_t_test(statistics_group_name): | |
| gr.Markdown(f"### {statistics_group_name}") | |
| stats = STATISTICS_T_TEST[statistics_group_name] | |
| gr.Number(label="Deletions number (rel to the initial msg length)", interactive=False, | |
| value=stats['deletions_norm'], precision=3, min_width=00) | |
| gr.Number(label="Insertions number (rel to the result length)", interactive=False, | |
| value=stats['insertions_norm'], precision=3, min_width=00) | |
| gr.Number(label="Changes number (rel to the initial msg length)", interactive=False, | |
| value=stats['changes_norm'], precision=3, min_width=00) | |
| gr.Number(label="Deletions number", interactive=False, | |
| value=stats['deletions'], precision=3, min_width=00) | |
| gr.Number(label="Insertions number", interactive=False, | |
| value=stats['insertions'], precision=3, min_width=00) | |
| gr.Number(label="Changes number", interactive=False, | |
| value=stats['changes'], precision=3, min_width=00) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("manual") | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("e2s") | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("s2e") | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("e2s_s2e") | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("synthetic") | |
| with gr.Column(scale=1, min_width=100): | |
| layout_for_statistics("all") | |
| # gr.Markdown(f"### Student t-test (p-value)") | |
| # with gr.Row(): | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("manual") | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("e2s") | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("s2e") | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("e2s_s2e") | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("synthetic") | |
| # with gr.Column(scale=1, min_width=100): | |
| # layout_for_statistics_t_test("all") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| for stat_name in filter(lambda s: "_norm" not in s, STAT_NAMES): | |
| chart = dataset_statistics.build_plotly_chart( | |
| stat_golden=STATISTICS['manual'][stat_name], | |
| stat_e2s=STATISTICS['e2s'][stat_name], | |
| stat_s2e=STATISTICS['s2e'][stat_name], | |
| stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name], | |
| stat_name=stat_name | |
| ) | |
| gr.Plot(value=chart) | |
| with gr.Column(scale=1): | |
| with gr.Column(scale=1): | |
| for stat_name in filter(lambda s: "_norm" in s, STAT_NAMES): | |
| chart = dataset_statistics.build_plotly_chart( | |
| stat_golden=STATISTICS['manual'][stat_name], | |
| stat_e2s=STATISTICS['e2s'][stat_name], | |
| stat_s2e=STATISTICS['s2e'][stat_name], | |
| stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name], | |
| stat_name=stat_name | |
| ) | |
| gr.Plot(value=chart) | |
| gr.Markdown(f"### Reference-only correlations") | |
| gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown()) | |
| gr.Markdown(f"### Aggregated correlations") | |
| gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown()) | |
| application.load(update_dataset_view_manual, inputs=slider_manual, | |
| outputs=view_manual) | |
| application.load(update_dataset_view_synthetic, inputs=slider_synthetic, | |
| outputs=view_synthetic) | |
| application.launch() | |