File size: 7,196 Bytes
4e9d603
0fa757f
9269d50
 
 
 
 
 
 
8746fcc
9269d50
4835e80
9269d50
4835e80
9269d50
 
4835e80
 
 
 
 
9269d50
 
 
 
 
4835e80
 
 
 
 
 
 
 
9269d50
 
 
 
722ef03
9269d50
 
 
 
 
 
 
 
 
9d8fc70
 
 
 
 
 
9269d50
 
 
 
 
4835e80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318551b
4835e80
 
 
 
 
 
 
 
 
 
 
 
 
 
15b17df
d39adb8
4835e80
 
d39adb8
15b17df
 
4835e80
9269d50
4835e80
 
 
9d8fc70
4835e80
 
 
d39adb8
4835e80
 
d39adb8
 
 
 
 
4835e80
d39adb8
e549fa9
722ef03
d39adb8
 
 
 
 
 
 
 
 
722ef03
d39adb8
 
 
4835e80
 
 
 
 
 
 
4e9d603
0fa757f
 
4835e80
9269d50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import gradio as gr
import rebiber 
import os
import uuid
 

# Load Bib Database 
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"

bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)

abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)


def process(input_bib, shorten, remove_keys, deduplicate, sort):
    if "@" not in input_bib:
        return "N/A"
    global abbr_dict
    # print(f"remove_keys={remove_keys}")
    random_id = uuid.uuid4().hex
    with open(f"input_{random_id}.bib", "w") as f:
        f.write(input_bib.replace("\t", "    "))
    all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
    print("# Input Bib Entries:", len(all_bib_entries))
    abbr_dict_pass = []
    if shorten:
        abbr_dict_pass = abbr_dict
    rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
                          abbr_dict=abbr_dict_pass,
                          deduplicate=deduplicate,
                          sort=sort,
                          removed_value_names=remove_keys)
    with open(f"output_{random_id}.bib") as f:
        output_bib = f.read().replace("\n ", "\n    ")
    # delete both files
    # print(output_bib)
    return output_bib, random_id, gr.update(visible=True)


example_input = """
@article{lin2020birds,
    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
    journal={arXiv preprint arXiv:2005.00683},
    year={2020}
} 
@inproceedings{Lin2020CommonGenAC,
  title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
  author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
  booktitle={Findings},
  year={2020}
}
""" 

examples = [[example_input]]


# iface = gr.Interface(fn=process,
#     inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
#     outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
#     examples=examples,
#     allow_flagging="never"
#     )





with gr.Blocks() as demo:
    
    gr.Markdown(
            '''# Rebiber: A tool for normalizing bibtex with official info.
                <table> 
                <tr>
                <td>
                <a href="https://yuchenlin.xyz/">
                    <img src="https://img.shields.io/badge/Yuchen%20Lin-🐼-blue?style=social">
                </a>
                </td>
                <td>
                <a href="https://github.com/yuchenlin/rebiber">
                    <img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
                </a>
                </td>
                <td>
                <a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
                    <img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
                </a>
                </td>
                </tr>
                </table>
                <span style="font-size:13pt">
                
                We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. 
                We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
                Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.  
                
                </span>
            '''
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
            removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"], 
                                value=[False, False, False, False, False, False, False, False],
                                label="Remove Keys", info="Which keys to remove?")
            shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)            
            dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
            sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
            with gr.Row():
                clr_button = gr.Button("Clear")
                button = gr.Button("Submit")
            ex_uuid = gr.Text(label="UUID")
            ex_uuid.visible = False
        with gr.Column(scale=3):
            output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)        
            download_btn = gr.Button("Generate Bib File")
            download_btn.visible = False
            download_content = gr.outputs.File()
            download_content.visible = False 
    def download_file(ex_uuid):
        global download_content
        # Replace this with your code to generate/download the file
        file_path = f"output_{ex_uuid}.bib"
        download_content.update(visible=False)
        return file_path, gr.update(visible=True)
    download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])  
    button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
    def clean(text):
        return ""
    clr_button.click(clean, input_bib, input_bib)
    # gr.Interface(fn=process,
    # outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
    # examples=examples,
    # allow_flagging="never",
    # scroll_to_output=True,
    # show_progress=True,
    # )


if __name__ == "__main__":
    demo.launch()


"""
@article{lin2020birds,
    title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
    author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
    journal={arXiv preprint arXiv:2005.00683},
    year={2020}
} 

@inproceedings{lin2020birds,
 address = {Online},
 author = {Lin, Bill Yuchen  and
Lee, Seyeon  and
Khanna, Rahul  and
Ren, Xiang},
 booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
 doi = {10.18653/v1/2020.emnlp-main.557},
 pages = {6862--6868},
 publisher = {Association for Computational Linguistics},
 title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
 url = {https://aclanthology.org/2020.emnlp-main.557},
 year = {2020}
}   
"""