Spaces:
Sleeping
Sleeping
Petr Tsvetkov
Create update function for diff view using diff2htmlUI js library instead of iframe
5602c70
import os | |
import random | |
import uuid | |
import gradio as gr | |
from datasets import load_dataset | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
HF_DATASET = os.environ.get('HF_DATASET') | |
configuration = "commitchronicle-py-long" # select a configuration | |
dataset = load_dataset("JetBrains-Research/lca-cmg", | |
configuration, | |
split="test", | |
cache_dir="data") | |
n_samples = len(dataset) | |
saver = gr.HuggingFaceDatasetSaver(HF_TOKEN, HF_DATASET, private=True) | |
def get_github_api_url(repo, hash): | |
repo_url = f"https://api.github.com/repos/{repo}/commits/{hash}" | |
return repo_url | |
DIFF_VIEW_UPDATE_JS_FN = """ | |
<script> | |
function updateDiffView() { | |
var github_api_url = document.getElementById('diff-view').getAttribute("github-api-url"); | |
var xmlHttp = new XMLHttpRequest(); | |
xmlHttp.onreadystatechange = function() {{ | |
if (xmlHttp.readyState == 4 && xmlHttp.status == 200) | |
var diff = xmlHttp.responseText; | |
console.log(diff); | |
var targetElement = document.getElementById('diff-view'); | |
var configuration = { | |
drawFileList: true, | |
matching: 'lines', | |
highlight: true | |
}; | |
var diff2htmlUi = new Diff2HtmlUI(targetElement, diff, configuration); | |
diff2htmlUi.draw(); | |
}} | |
xmlHttp.open("GET", github_api_url, true); | |
xmlHttp.setRequestHeader("Accept", "application/vnd.github.v3.diff"); | |
xmlHttp.send(); | |
} | |
</script> | |
""" | |
def get_diff2html_view(github_api_url): | |
html = f""" | |
<div style='width:100%; height:720px; overflow:auto'> | |
<div | |
id='diff-view' | |
github-api-url="{github_api_url}" | |
></div> | |
</div> | |
""" | |
return html | |
def update_commit_view(sample_ind): | |
if sample_ind >= n_samples: | |
return None | |
record = dataset[sample_ind] | |
github_api_url = get_github_api_url(record['repo'], record['hash']) | |
# github_link_md = f"[See the commit on GitHub]({github_link})" | |
diff_view = get_diff2html_view(github_api_url) | |
commit_msg = record['message'] | |
repo_val = record['repo'] | |
hash_val = record['hash'] | |
return diff_view, commit_msg, repo_val, hash_val | |
def next_sample(current_sample_ind, shuffled_idx): | |
if current_sample_ind == n_samples: | |
return None | |
current_sample_ind += 1 | |
updated_view = update_commit_view(shuffled_idx[current_sample_ind]) | |
return (current_sample_ind,) + updated_view | |
DIFF2HTML_IMPORTS = """ | |
<!-- Stylesheet --> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.8.0/styles/github.min.css" /> | |
<link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/diff2html/bundles/css/diff2html.min.css" /> | |
<!-- Javascripts --> | |
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/diff2html/bundles/js/diff2html-ui.min.js"></script> | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(), head=DIFF2HTML_IMPORTS + DIFF_VIEW_UPDATE_JS_FN) as application: | |
repo_val = gr.Textbox(interactive=False, label='repo', visible=False) | |
hash_val = gr.Textbox(interactive=False, label='hash', visible=False) | |
shuffled_idx_val = gr.JSON(visible=False) | |
with gr.Row(): | |
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1, | |
value=0, | |
interactive=False, | |
label='sample_ind', | |
info=f"Samples labeled/skipped (out of {n_samples})", | |
show_label=False, | |
container=False, | |
scale=5) | |
with gr.Column(scale=1): | |
skip_btn = gr.Button("Skip the current sample") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# github_link = gr.Markdown() | |
diff_view = gr.HTML() | |
with gr.Column(scale=1): | |
commit_msg = gr.Textbox(label="Commit message", | |
interactive=False, | |
) | |
gr.Markdown("## Please, answer the questions below") | |
verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?', | |
label='verbosity', | |
show_label=False, | |
choices=[ | |
('Too short', 0), | |
('Just right', 1), | |
('Too verbose', 2)]) | |
correctness_feedback = gr.Radio(info='Is the commit message factually correct?', | |
label='is_correct', | |
show_label=False, | |
choices=[ | |
('Yes', True), | |
('No', False)]) | |
format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)', | |
label='format_score', | |
show_label=False, | |
minimum=1, | |
step=1, | |
interactive=True, | |
maximum=5) | |
submit_btn = gr.Button("Submit and continue") | |
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False, | |
label='session') | |
commit_view = [ | |
# github_link, | |
diff_view, | |
commit_msg, | |
repo_val, | |
hash_val | |
] | |
feedback_form = [ | |
session_val, | |
repo_val, | |
hash_val, | |
verbosity_feedback, | |
correctness_feedback, | |
format_feedback | |
] | |
saver.setup([current_sample_sld] + feedback_form, "feedback") | |
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val], | |
outputs=[current_sample_sld] + commit_view) | |
def submit(current_sample, shuffled_idx, *args): | |
saver.flag((current_sample,) + args) | |
return next_sample(current_sample, shuffled_idx) | |
submit_btn.click(submit, inputs=[current_sample_sld, shuffled_idx_val] + feedback_form, | |
outputs=[current_sample_sld] + commit_view) | |
def init_session(current_sample): | |
session = str(uuid.uuid4()) | |
shuffled_idx = list(range(n_samples)) | |
random.shuffle(shuffled_idx) | |
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample]) | |
application.load(init_session, inputs=[current_sample_sld], outputs=[session_val, shuffled_idx_val] + commit_view) | |
application.launch() | |