Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
from transformers import AutoTokenizer, AutoModel | |
from sentence_transformers import SentenceTransformer | |
import pickle | |
import nltk | |
nltk.download('punkt') # tokenizer | |
nltk.download('averaged_perceptron_tagger') # postagger | |
import time | |
from input_format import * | |
from score import * | |
# load document scoring model | |
#torch.cuda.is_available = lambda : False # uncomment to test with CPU only | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
pretrained_model = 'allenai/specter' | |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model) | |
doc_model = AutoModel.from_pretrained(pretrained_model) | |
doc_model.to(device) | |
# load sentence model | |
sent_model = SentenceTransformer('sentence-transformers/gtr-t5-base') | |
sent_model.to(device) | |
def get_similar_paper( | |
abstract_text_input, | |
author_id_input, | |
results={}, # this state variable will be updated and returned | |
#progress=gr.Progress() | |
): | |
progress = gr.Progress() | |
num_papers_show = 10 # number of top papers to show from the reviewer | |
print('retrieving similar papers...') | |
start = time.time() | |
input_sentences = sent_tokenize(abstract_text_input) | |
# Get author papers from id | |
#progress(0.1, desc="Retrieving reviewer papers ...") | |
name, papers = get_text_from_author_id(author_id_input) | |
# Compute Doc-level affinity scores for the Papers | |
# print('computing document scores...') | |
#progress(0.5, desc="Computing document scores...") | |
# TODO detect duplicate papers? | |
titles, abstracts, paper_urls, doc_scores = compute_document_score( | |
doc_model, | |
tokenizer, | |
abstract_text_input, | |
papers, | |
batch=10 | |
) | |
results = { | |
'name': name, | |
'titles': titles, | |
'abstracts': abstracts, | |
'urls': paper_urls, | |
'doc_scores': doc_scores | |
} | |
# Select top K choices of papers to show | |
titles = titles[:num_papers_show] | |
abstracts = abstracts[:num_papers_show] | |
doc_scores = doc_scores[:num_papers_show] | |
paper_urls = paper_urls[:num_papers_show] | |
display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)] | |
end = time.time() | |
retrieval_time = end - start | |
print('paper retrieval complete in [%0.2f] seconds'%(retrieval_time)) | |
progress(0.9, desc="Obtaining relevant information from the papers...") | |
print('obtaining highlights..') | |
start = time.time() | |
input_sentences = sent_tokenize(abstract_text_input) | |
num_sents = len(input_sentences) | |
for aa, (tt, ab, ds, url) in enumerate(zip(titles, abstracts, doc_scores, paper_urls)): | |
# Compute sent-level and phrase-level affinity scores for each papers | |
sent_ids, sent_scores, info, top_pairs_info = get_highlight_info( | |
sent_model, | |
abstract_text_input, | |
ab, | |
K=2 # top two sentences from the candidate | |
) | |
# get scores for each word in the format for Gradio Interpretation component | |
word_scores = dict() | |
for i in range(num_sents): | |
ww, ss = remove_spaces(info['all_words'], info[i]['scores']) | |
word_scores[str(i)] = { | |
"original": ab, | |
"interpretation": list(zip(ww, ss)) | |
} | |
results[display_title[aa]] = { | |
'title': tt, | |
'abstract': ab, | |
'doc_score': '%0.3f'%ds, | |
'source_sentences': input_sentences, | |
'highlight': word_scores, | |
'top_pairs': top_pairs_info, | |
'url': url | |
} | |
end = time.time() | |
highlight_time = end - start | |
print('done in [%0.2f] seconds'%(highlight_time)) | |
## Set up output elements | |
# set up elements to show | |
out = [ | |
gr.update(choices=display_title, interactive=True), # set of papers (radio) | |
gr.update(choices=input_sentences, interactive=True) # submission sentences | |
] | |
# set up elements to visualize upfront | |
top_papers_show = 3 # number of top papers to show upfront | |
top_num_info_show = 2 # number of sentence pairs from each paper to show upfront | |
summary_out = [] | |
for i in range(top_papers_show): | |
if i == 0: | |
out_tmp = [ | |
gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True), | |
gr.update(value="""#### Affinity Score: %0.3f | |
<div class="help-tip"> | |
<p>Measures how similar the paper's abstract is to the submission abstract.</p> | |
</div> | |
"""%doc_scores[i], | |
visible=True) # document affinity | |
] | |
else: | |
out_tmp = [ | |
gr.update(value="""<a href="%s" target="_blank"><h4>%s</h4></a>"""%(paper_urls[i], titles[i]), visible=True), | |
gr.update(value='#### Affinity Score: %0.3f'%doc_scores[i], visible=True) # document affinity | |
] | |
tp = results[display_title[i]]['top_pairs'] | |
for j in range(top_num_info_show): | |
if i == 0 and j == 0: | |
out_tmp += [ | |
gr.update(value="""Sentence Relevance:\n%0.3f | |
<div class="help-tip"> | |
<p>Measures how similar the sentence pairs are.</p> | |
</div>"""%tp[j]['score'], visible=True), # sentence relevance | |
tp[j]['query']['original'], | |
tp[j]['query'], | |
tp[j]['candidate']['original'], | |
tp[j]['candidate'] | |
] | |
else: | |
out_tmp += [ | |
gr.update(value='Sentence Relevance:\n%0.3f'%tp[j]['score'], visible=True), # sentence relevance | |
tp[j]['query']['original'], | |
tp[j]['query'], | |
tp[j]['candidate']['original'], | |
tp[j]['candidate'] | |
] | |
summary_out += out_tmp | |
# add updates to the show more button | |
out = out + summary_out + [gr.update(visible=True)] # make show more button visible | |
assert(len(out) == (top_num_info_show * 5 + 2) * top_papers_show + 3) | |
out += [gr.update(value=""" | |
<h3>Top three relevant papers by the reviewer <a href="%s" target="_blank">%s</a></h3> | |
For each paper, two sentence pairs (one from the submission, one from the paper) with the highest relevance scores are shown. | |
**<span style="color:black;background-color:#65B5E3;">Blue highlights</span>**: phrases that appear in both sentences. | |
"""%(author_id_input, results['name']), | |
visible=True)] # result 1 description | |
out += [gr.update(visible=True), gr.update(visible=True)] # demarcation line between results | |
# progress status | |
out += [gr.update(value='Done (in %0.1f seconds)'%(retrieval_time+highlight_time), visible=True)] | |
# result 2 description | |
desc = """ | |
##### Click a paper by %s on the left (sorted by affinity scores), and a sentence from the submission on the right, to see which parts the paper are relevant. | |
"""%results['name'] | |
out += [gr.update(value=desc)] | |
# add the search results to pass on to the Gradio State varaible | |
out += [results] | |
return tuple(out) | |
def show_more(info): | |
# show the interactive part of the app | |
return ( | |
gr.update(visible=True), # description | |
gr.update(visible=True), # set of papers | |
gr.update(visible=True), # submission sentences | |
gr.update(visible=True), # title row | |
gr.update(visible=True), # affinity row | |
gr.update(visible=True), # highlight legend | |
) | |
def show_status(): | |
# show search status field when search button is clicked | |
return gr.update(visible=True) | |
def update_name(author_id_input): | |
# update the name of the author based on the id input | |
name, _ = get_text_from_author_id(author_id_input) | |
return gr.update(value=name) | |
def change_output_highlight(selected_papers_radio, source_sent_choice, info={}): | |
# change the output highlight based on the sentence selected from the submission | |
if len(info.keys()) != 0: # if the info is not empty | |
source_sents = info[selected_papers_radio]['source_sentences'] | |
highlights = info[selected_papers_radio]['highlight'] | |
for i, s in enumerate(source_sents): | |
if source_sent_choice == s: | |
return highlights[str(i)] | |
else: | |
return | |
def change_paper(selected_papers_radio, info={}): | |
if len(info.keys()) != 0: # if the info is not empty | |
title = info[selected_papers_radio]['title'] | |
abstract = info[selected_papers_radio]['abstract'] | |
aff_score = info[selected_papers_radio]['doc_score'] | |
highlights = info[selected_papers_radio]['highlight'] | |
url = info[selected_papers_radio]['url'] | |
title_out = """<a href="%s" target="_blank"><h5>%s</h5></a>"""%(url, title) | |
aff_score_out = '##### Affinity Score: %s'%aff_score | |
return title_out, abstract, aff_score_out, highlights['0'] | |
else: | |
return | |
with gr.Blocks(css='style.css') as demo: | |
info = gr.State({}) # cached search results as a State variable shared throughout | |
# Text description about the app and disclaimer | |
### TEXT Description | |
# General instruction | |
general_instruction = """ | |
# R2P2: Reviewer TO Paper in Peer review | |
#### Who is it for? | |
It is for meta-reviewers, area chairs, program chairs, or anyone who oversees the submission-reviewer matching process in peer review for acadmeic conferences, journals, and grants. | |
#### How does it help? | |
A typical meta-reviewer workflow lacks supportive information on **what makes the pre-selected candidate reviewers a good fit** for the submission. Only affinity scores between the reviewer and the paper are shown, without additional detail. | |
R2P2 provides more information about each reviewer. It searches for the most relevant papers among the reviewer's previous publications and highlights relevant parts within them. | |
""" | |
# TODO add instruction video link | |
# More details (video, addendum) | |
more_details_instruction = """Check out <a href="", target="_blank">this video</a> for a quick demo of what R2P2 is and how it can help. For more details (e.g., relevant work, privacy policy, disclaimer), refer to <a href="file/details.html", target="_blank">here</a>.""" | |
gr.Markdown(general_instruction) | |
gr.HTML(more_details_instruction) | |
gr.Markdown("""---""") | |
### INPUT | |
with gr.Row() as input_row: | |
with gr.Column(): | |
abstract_text_input = gr.Textbox(label='Submission Abstract', info='Paste in the abstract of the submission.') | |
with gr.Column(): | |
with gr.Row(): | |
author_id_input = gr.Textbox(label='Reviewer Profile Link (Semantic Scholar)', info="Paste in the reviewer's Semantic Scholar link") | |
with gr.Row(): | |
name = gr.Textbox(label='Confirm Reviewer Name', info='This will be automatically updated based on the reviewer profile link above', interactive=False) | |
author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name) | |
with gr.Row(): | |
compute_btn = gr.Button('What Makes This a Good Match?') | |
with gr.Row(): | |
search_status = gr.Textbox(label='Search Status', interactive=False, visible=False) | |
### OVERVIEW | |
# Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers | |
## ONE BLOCK OF INFO FOR A SINGLE PAPER | |
## PAPER1 | |
with gr.Row(): | |
result1_desc = gr.Markdown(value='', visible=False) | |
# TODO hovering instructions | |
with gr.Row(): | |
with gr.Column(scale=3): | |
paper_title1 = gr.Markdown(value='', visible=False) | |
with gr.Column(scale=1): | |
affinity1 = gr.Markdown(value='', visible=False) | |
with gr.Row() as rel1_1: | |
with gr.Column(scale=1): | |
sent_pair_score1_1 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source1_1 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source1_1_hl = gr.components.Interpretation(sent_pair_source1_1) | |
with gr.Column(scale=4): | |
sent_pair_candidate1_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate1_1_hl = gr.components.Interpretation(sent_pair_candidate1_1) | |
with gr.Row() as rel1_2: | |
with gr.Column(scale=1): | |
sent_pair_score1_2 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source1_2 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source1_2_hl = gr.components.Interpretation(sent_pair_source1_2) | |
with gr.Column(scale=4): | |
sent_pair_candidate1_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate1_2_hl = gr.components.Interpretation(sent_pair_candidate1_2) | |
with gr.Row(visible=False) as demarc1: | |
gr.Markdown( | |
"""---""" | |
) | |
## PAPER 2 | |
with gr.Row(): | |
with gr.Column(scale=3): | |
paper_title2 = gr.Markdown(value='', visible=False) | |
with gr.Column(scale=1): | |
affinity2 = gr.Markdown(value='', visible=False) | |
with gr.Row() as rel2_1: | |
with gr.Column(scale=1): | |
sent_pair_score2_1 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source2_1 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source2_1_hl = gr.components.Interpretation(sent_pair_source2_1) | |
with gr.Column(scale=4): | |
sent_pair_candidate2_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate2_1_hl = gr.components.Interpretation(sent_pair_candidate2_1) | |
with gr.Row() as rel2_2: | |
with gr.Column(scale=1): | |
sent_pair_score2_2 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source2_2 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source2_2_hl = gr.components.Interpretation(sent_pair_source2_2) | |
with gr.Column(scale=4): | |
sent_pair_candidate2_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate2_2_hl = gr.components.Interpretation(sent_pair_candidate2_2) | |
with gr.Row(visible=False) as demarc2: | |
gr.Markdown( | |
"""---""" | |
) | |
## PAPER 3 | |
with gr.Row(): | |
with gr.Column(scale=3): | |
paper_title3 = gr.Markdown(value='', visible=False) | |
with gr.Column(scale=1): | |
affinity3 = gr.Markdown(value='', visible=False) | |
with gr.Row() as rel3_1: | |
with gr.Column(scale=1): | |
sent_pair_score3_1 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source3_1 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source3_1_hl = gr.components.Interpretation(sent_pair_source3_1) | |
with gr.Column(scale=4): | |
sent_pair_candidate3_1 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate3_1_hl = gr.components.Interpretation(sent_pair_candidate3_1) | |
with gr.Row() as rel3_2: | |
with gr.Column(scale=1): | |
sent_pair_score3_2 = gr.Markdown(interactive=False, value='', visible=False) | |
with gr.Column(scale=4): | |
sent_pair_source3_2 = gr.Textbox(label='Sentence from Submission', visible=False) | |
sent_pair_source3_2_hl = gr.components.Interpretation(sent_pair_source3_2) | |
with gr.Column(scale=4): | |
sent_pair_candidate3_2 = gr.Textbox(label="Sentence from Reviewer's Paper", visible=False) | |
sent_pair_candidate3_2_hl = gr.components.Interpretation(sent_pair_candidate3_2) | |
## Show more button | |
with gr.Row(): | |
see_more_rel_btn = gr.Button('Explore more', visible=False) | |
### PAPER INFORMATION | |
# Description for Explore More Section | |
with gr.Row(): | |
result2_desc = gr.Markdown(value='', visible=False) | |
# Highlight description | |
hl_desc = """ | |
**<span style="color:black;background-color:#DB7262;">Red</span>**: sentences simiar to the selected sentence from submission. Darker = more similar. | |
**<span style="color:black;background-color:#65B5E3;">Blue</span>**: phrases that appear in both sentences. | |
--- | |
""" | |
# show multiple papers in radio check box to select from | |
paper_abstract = gr.Textbox(label='Abstract', interactive=False, visible=False) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
selected_papers_radio = gr.Radio( | |
choices=[], # will be udpated with the button click | |
visible=False, # also will be updated with the button click | |
label='Top Relevant Papers from the Reviewer' | |
) | |
with gr.Column(scale=2): | |
# sentences from submission | |
source_sentences = gr.Radio( | |
choices=[], | |
visible=False, | |
label='Sentences from Submission Abstract', | |
) | |
with gr.Column(scale=3): | |
# selected paper and highlight | |
with gr.Row(): | |
highlight_legend = gr.Markdown(value=hl_desc, visible=False) | |
with gr.Row(visible=False) as title_row: | |
paper_title = gr.Markdown(value='') | |
with gr.Row(visible=False) as aff_row: | |
affinity = gr.Markdown(value='') | |
with gr.Row(): | |
# highlighted text from paper | |
highlight = gr.components.Interpretation(paper_abstract) | |
### EVENT LISTENERS | |
compute_btn.click( | |
fn=show_status, | |
inputs=[], | |
outputs=search_status | |
) | |
# retrieve similar papers and show top results | |
compute_btn.click( | |
fn=get_similar_paper, | |
inputs=[ | |
abstract_text_input, | |
author_id_input, | |
info | |
], | |
outputs=[ | |
selected_papers_radio, | |
source_sentences, | |
paper_title1, # paper info | |
affinity1, | |
sent_pair_score1_1, | |
sent_pair_source1_1, | |
sent_pair_source1_1_hl, | |
sent_pair_candidate1_1, | |
sent_pair_candidate1_1_hl, | |
sent_pair_score1_2, | |
sent_pair_source1_2, | |
sent_pair_source1_2_hl, | |
sent_pair_candidate1_2, | |
sent_pair_candidate1_2_hl, | |
paper_title2, | |
affinity2, | |
sent_pair_score2_1, | |
sent_pair_source2_1, | |
sent_pair_source2_1_hl, | |
sent_pair_candidate2_1, | |
sent_pair_candidate2_1_hl, | |
sent_pair_score2_2, | |
sent_pair_source2_2, | |
sent_pair_source2_2_hl, | |
sent_pair_candidate2_2, | |
sent_pair_candidate2_2_hl, | |
paper_title3, | |
affinity3, | |
sent_pair_score3_1, | |
sent_pair_source3_1, | |
sent_pair_source3_1_hl, | |
sent_pair_candidate3_1, | |
sent_pair_candidate3_1_hl, | |
sent_pair_score3_2, | |
sent_pair_source3_2, | |
sent_pair_source3_2_hl, | |
sent_pair_candidate3_2, | |
sent_pair_candidate3_2_hl, | |
see_more_rel_btn, | |
result1_desc, | |
demarc1, | |
demarc2, | |
search_status, | |
result2_desc, | |
info, | |
], | |
show_progress=True, | |
scroll_to_output=True | |
) | |
# Get more info (move to more interactive portion) | |
see_more_rel_btn.click( | |
fn=show_more, | |
inputs=info, | |
outputs=[ | |
result2_desc, | |
selected_papers_radio, | |
source_sentences, | |
title_row, | |
aff_row, | |
highlight_legend, | |
] | |
) | |
# change highlight based on selected sentences from submission | |
source_sentences.change( | |
fn=change_output_highlight, | |
inputs=[ | |
selected_papers_radio, | |
source_sentences, | |
info | |
], | |
outputs=highlight | |
) | |
# change paper to show based on selected papers | |
selected_papers_radio.change( | |
fn=change_paper, | |
inputs=[ | |
selected_papers_radio, | |
info, | |
], | |
outputs= [ | |
paper_title, | |
paper_abstract, | |
affinity, | |
highlight | |
] | |
) | |
if __name__ == "__main__": | |
demo.queue().launch() # add ?__theme=light to force light mode | |