Spaces:

yuchenlin
/

Rebiber

Running

Rebiber / app.py

(Bill) Yuchen Lin

hide the download button to avoid confusion before submit

722ef03 12 months ago

No virus

7.2 kB

	import gradio as gr
	import rebiber
	import os
	import uuid


	# Load Bib Database
	filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
	bib_list_path = os.path.join(filepath, "bib_list.txt")
	abbr_tsv_path = "abbr.tsv"

	bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)

	abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)


	def process(input_bib, shorten, remove_keys, deduplicate, sort):
	if "@" not in input_bib:
	return "N/A"
	global abbr_dict
	# print(f"remove_keys={remove_keys}")
	random_id = uuid.uuid4().hex
	with open(f"input_{random_id}.bib", "w") as f:
	f.write(input_bib.replace("\t", " "))
	all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
	print("# Input Bib Entries:", len(all_bib_entries))
	abbr_dict_pass = []
	if shorten:
	abbr_dict_pass = abbr_dict
	rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
	abbr_dict=abbr_dict_pass,
	deduplicate=deduplicate,
	sort=sort,
	removed_value_names=remove_keys)
	with open(f"output_{random_id}.bib") as f:
	output_bib = f.read().replace("\n ", "\n ")
	# delete both files
	# print(output_bib)
	return output_bib, random_id, gr.update(visible=True)


	example_input = """
	@article{lin2020birds,
	title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
	author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
	journal={arXiv preprint arXiv:2005.00683},
	year={2020}
	}
	@inproceedings{Lin2020CommonGenAC,
	title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
	author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
	booktitle={Findings},
	year={2020}
	}
	"""

	examples = [[example_input]]


	# iface = gr.Interface(fn=process,
	# inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
	# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
	# examples=examples,
	# allow_flagging="never"
	# )





	with gr.Blocks() as demo:

	gr.Markdown(
	'''# Rebiber: A tool for normalizing bibtex with official info.
	<table>
	<tr>
	<td>
	<a href="https://yuchenlin.xyz/">
	<img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
	</a>
	</td>
	<td>
	<a href="https://github.com/yuchenlin/rebiber">
	<img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
	</a>
	</td>
	<td>
	<a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
	<img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
	</a>
	</td>
	</tr>
	</table>
	<span style="font-size:13pt">

	We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
	We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
	Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.

	</span>
	'''
	)

	with gr.Row():
	with gr.Column(scale=3):
	input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
	removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"],
	value=[False, False, False, False, False, False, False, False],
	label="Remove Keys", info="Which keys to remove?")
	shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)
	dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
	sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
	with gr.Row():
	clr_button = gr.Button("Clear")
	button = gr.Button("Submit")
	ex_uuid = gr.Text(label="UUID")
	ex_uuid.visible = False
	with gr.Column(scale=3):
	output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
	download_btn = gr.Button("Generate Bib File")
	download_btn.visible = False
	download_content = gr.outputs.File()
	download_content.visible = False
	def download_file(ex_uuid):
	global download_content
	# Replace this with your code to generate/download the file
	file_path = f"output_{ex_uuid}.bib"
	download_content.update(visible=False)
	return file_path, gr.update(visible=True)
	download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
	button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
	def clean(text):
	return ""
	clr_button.click(clean, input_bib, input_bib)
	# gr.Interface(fn=process,
	# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
	# examples=examples,
	# allow_flagging="never",
	# scroll_to_output=True,
	# show_progress=True,
	# )


	if __name__ == "__main__":
	demo.launch()


	"""
	@article{lin2020birds,
	title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
	author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
	journal={arXiv preprint arXiv:2005.00683},
	year={2020}
	}

	@inproceedings{lin2020birds,
	address = {Online},
	author = {Lin, Bill Yuchen and
	Lee, Seyeon and
	Khanna, Rahul and
	Ren, Xiang},
	booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
	doi = {10.18653/v1/2020.emnlp-main.557},
	pages = {6862--6868},
	publisher = {Association for Computational Linguistics},
	title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
	url = {https://aclanthology.org/2020.emnlp-main.557},
	year = {2020}
	}
	"""