File size: 14,566 Bytes
0e072c0
 
 
 
9890a3c
0e072c0
 
 
 
 
 
 
 
 
 
 
 
9890a3c
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9890a3c
 
0e072c0
 
 
 
 
 
 
 
 
9890a3c
0e072c0
 
 
 
 
 
 
 
 
9890a3c
 
 
 
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
9890a3c
 
 
 
 
 
 
 
 
 
 
 
0e072c0
 
9890a3c
 
 
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9890a3c
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91d94e0
 
 
0e072c0
 
 
9890a3c
91d94e0
9890a3c
 
91d94e0
 
 
0e072c0
91d94e0
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9890a3c
 
 
0e072c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9890a3c
0e072c0
 
 
 
 
 
 
9890a3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
import gradio as gr
import os
import openai
from auto_backgrounds import generate_backgrounds, generate_draft
from utils.file_operations import hash_name, list_folders
from references_generator import generate_top_k_references

# todo:
#   6. get logs when the procedure is not completed. *
#   7. 自己的文件库; 更多的prompts
#   8. Decide on how to generate the main part of a paper * (Langchain/AutoGPT
#   1. 把paper改成纯JSON?
#   2. 实现别的功能
#   3. Check API Key GPT-4 Support.
#   8. Re-build some components using `langchain`
#           - in `gpt_interation`, use LLM
# future:
#   generation.log sometimes disappears (ignore this)
#   1. Check if there are any duplicated citations
#   2. Remove potential thebibliography and bibitem in .tex file

#######################################################################################################################
# Check if openai and cloud storage available
#######################################################################################################################
openai_key = os.getenv("OPENAI_API_KEY")
access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
if access_key_id is None or secret_access_key is None:
    print("Access keys are not provided. Outputs cannot be saved to AWS Cloud Storage.\n")
    IS_CACHE_AVAILABLE = False
else:
    IS_CACHE_AVAILABLE = True

if openai_key is None:
    print("OPENAI_API_KEY is not found in environment variables. The output may not be generated.\n")
    IS_OPENAI_API_KEY_AVAILABLE = False
else:
    openai.api_key = openai_key
    try:
        openai.Model.list()
        IS_OPENAI_API_KEY_AVAILABLE = True
    except Exception as e:
        IS_OPENAI_API_KEY_AVAILABLE = False

ALL_TEMPLATES = list_folders("latex_templates")


def clear_inputs(*args):
    return "", ""

def clear_inputs_refs(*args):
    return "", 5


def wrapped_generator(paper_title, paper_description, openai_api_key=None,
                      paper_template="ICLR2022", tldr=True, selected_sections=None, bib_refs=None, model="gpt-4",
                      cache_mode=IS_CACHE_AVAILABLE):
    # if `cache_mode` is True, then follow the following steps:
    #        check if "title"+"description" have been generated before
    #        if so, download from the cloud storage, return it
    #        if not, generate the result.
    if bib_refs is not None:
        bib_refs = bib_refs.name
    if openai_api_key is not None:
        openai.api_key = openai_api_key
        try:
            openai.Model.list()
        except Exception as e:
            raise gr.Error(f"Key错误. Error: {e}")

    if cache_mode:
        from utils.storage import list_all_files, download_file, upload_file
        # check if "title"+"description" have been generated before
        input_dict = {"title": paper_title, "description": paper_description,
                      "generator": "generate_draft"}
        file_name = hash_name(input_dict) + ".zip"
        file_list = list_all_files()
        # print(f"{file_name} will be generated. Check the file list {file_list}")
        if file_name in file_list:
            # download from the cloud storage, return it
            download_file(file_name)
            return file_name
        else:
            try:
                # generate the result.
                # output = fake_generate_backgrounds(title, description, openai_key)
                output = generate_draft(paper_title, paper_description, template=paper_template,
                                        tldr=tldr, sections=selected_sections, bib_refs=bib_refs, model=model)
                # output = generate_draft(paper_title, paper_description, template, "gpt-4")
                upload_file(output)
                return output
            except Exception as e:
                raise gr.Error(f"生成失败. Error {e.__name__}: {e}")
    else:
        try:
            # output = fake_generate_backgrounds(title, description, openai_key)
            output = generate_draft(paper_title, paper_description, template=paper_template,
                                    tldr=tldr, sections=selected_sections, bib_refs=bib_refs, model=model)
        except Exception as e:
            raise gr.Error(f"生成失败. Error: {e}")
        return output


def wrapped_references_generator(paper_title, num_refs, openai_api_key=None):
    if openai_api_key is not None:
        openai.api_key = openai_api_key
        openai.Model.list()
    return generate_top_k_references(paper_title, top_k=num_refs)



theme = gr.themes.Default(font=gr.themes.GoogleFont("Questrial"))
# .set(
#     background_fill_primary='#E5E4E2',
#     background_fill_secondary = '#F6F6F6',
#     button_primary_background_fill="#281A39"
# )

ACADEMIC_PAPER = """## 一键生成论文初稿

1. 在Title文本框中输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning). 
2. 点击Submit. 等待大概十五分钟(全文). 
3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.  
"""


REFERENCES = """## 一键搜索相关论文
(此功能已经被整合进一键生成论文初稿)
1. 在Title文本框中输入想要搜索文献的论文(比如Playing Atari with Deep Reinforcement Learning). 
2. 点击Submit. 等待大概十分钟. 
3. 在右侧JSON处会显示相关文献.  
"""

REFERENCES_INSTRUCTION = """### References
这一行用于定义AI如何选取参考文献. 目前是两种方式混合:
1. GPT自动根据标题生成关键字,使用Semantic Scholar搜索引擎搜索文献,利用Specter获取Paper Embedding来自动选取最相关的文献作为GPT的参考资料.
2. 用户上传bibtex文件,使用Google Scholar搜索摘要作为GPT的参考资料. 
关于有希望利用本地文件来供GPT参考的功能将在未来实装.
"""

DOMAIN_KNOWLEDGE_INSTRUCTION = """### Domain Knowledge
(暂未实装)
这一行用于定义AI的知识库. 将提供两种选择: 
1. 各个领域内由专家预先收集资料并构建的的FAISS向量数据库. 每个数据库内包含了数百万页经过同行评议的论文和专业经典书籍. 
2. 自行构建的使用OpenAI text-embedding-ada-002模型创建的FAISS向量数据库.
"""

OTHERS_INSTRUCTION = """### Others

"""


with gr.Blocks(theme=theme) as demo:
    gr.Markdown('''
    # 分布式航天器系统实验室
    
    ## Ai-Draft: 学术写作辅助工具
    
    通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.    
    
    ***2023-06-08 Update***: 
    * 目前对英文的生成效果更好. 
    * GPT3.5模型可能会因为Token数不够导致一部分章节为空. 可以在高级设置里减少生成的章节. 
    
    ***2023-05-17 Update***: GPT-4的API Key没有的话界面右端会显示NOT AVALABLE. 这里为大家提供了一个位置输入OpenAI API Key. 同时也提供了GPT-3.5的兼容(高级设置里设置). 欢迎大家自行体验. 

    请***确认***您的OpenAI Key可以访问被选择的模型(不管您是什么账户类型,其中一些模型都需要额外申请,否则会导致error)
    
    如果有更多想法和建议欢迎联系我,我是分布式航天器系统实验室的小明.
    ''')

    with gr.Row():
        with gr.Column(scale=2):
            key = gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key",
                             visible=not IS_OPENAI_API_KEY_AVAILABLE)

            # generator = gr.Dropdown(choices=["学术论文", "文献总结"], value="文献总结",
            # label="Selection", info="目前支持生成'学术论文'和'文献总结'.", interactive=True)

            # 每个功能做一个tab
            with gr.Tab("学术论文"):
                gr.Markdown(ACADEMIC_PAPER)

                title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
                                   label="Title", info="论文标题")
                with gr.Accordion("高级设置", open=False):
                    with gr.Row():
                        description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
                                                    info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
                        with gr.Row():
                            template = gr.Dropdown(label="Template", choices=ALL_TEMPLATES, value="Default",
                                                   interactive=True,
                                                   info="生成论文的参考模板.")
                            model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"],
                                                          value="gpt-3.5-turbo",
                                                          interactive=True,
                                                          info="生成论文用到的语言模型.")
                        sections = gr.CheckboxGroup(
                            choices=["introduction", "related works", "backgrounds", "methodology", "experiments",
                                     "conclusion", "abstract"],
                            type="value", label="生成章节", interactive=True,
                            value=["introduction", "related works"])

                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.Markdown(REFERENCES_INSTRUCTION)

                        with gr.Column(scale=2):
                            search_engine = gr.Dropdown(label="Search Engine",
                                                        choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
                                                        value="Semantic Scholar",
                                                        interactive=False,
                                                        visible=False,
                                                        info="用于决定GPT用什么搜索引擎来搜索文献. (暂不支持修改)")
                            tldr_checkbox = gr.Checkbox(value=True, label="TLDR;",
                                                        info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
                                                        interactive=True)
                            gr.Markdown('''
                            上传.bib文件提供AI需要参考的文献. 
                            ''')
                            bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
                                                  interactive=True)

                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.Markdown(DOMAIN_KNOWLEDGE_INSTRUCTION)

                        with gr.Column(scale=2):
                            domain_knowledge = gr.Dropdown(label="预载知识库",
                                                        choices=["(None)", "Machine Learning"],
                                                        value="(None)",
                                                        interactive=False,
                                                        info="使用预先构建的知识库. (暂未实装)")
                            local_domain_knowledge = gr.File(label="本地知识库 (暂未实装)", interactive=False)
                with gr.Row():
                    clear_button_pp = gr.Button("Clear")
                    submit_button_pp = gr.Button("Submit", variant="primary")

            with gr.Tab("文献搜索"):
                gr.Markdown(REFERENCES)

                title_refs = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
                                   label="Title", info="论文标题")
                slider_refs = gr.Slider(minimum=1, maximum=100, value=5, step=1,
                                   interactive=True, label="最相关的参考文献数目")
                with gr.Row():
                    clear_button_refs = gr.Button("Clear")
                    submit_button_refs = gr.Button("Submit", variant="primary")

            with gr.Tab("文献综述 (Coming soon!)"):
                gr.Markdown('''
                <h1  style="text-align: center;">Coming soon!</h1>
                ''')
            with gr.Tab("Github文档 (Coming soon!)"):
                gr.Markdown('''
                <h1  style="text-align: center;">Coming soon!</h1>
                ''')

        with gr.Column(scale=1):
            style_mapping = {True: "color:white;background-color:green",
                             False: "color:white;background-color:red"}  # todo: to match website's style
            availability_mapping = {True: "AVAILABLE", False: "NOT AVAILABLE"}
            gr.Markdown(f'''## Huggingface Space Status  
             当`OpenAI API`显示AVAILABLE的时候这个Space可以直接使用.    
             当`OpenAI API`显示NOT AVAILABLE的时候这个Space可以通过在左侧输入OPENAI KEY来使用. 需要有GPT-4的API权限. 
             当`Cache`显示AVAILABLE的时候, 所有的输入和输出会被备份到我的云储存中. 显示NOT AVAILABLE的时候不影响实际使用. 
            `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availability_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>.  `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availability_mapping[IS_CACHE_AVAILABLE]}</span>.''')
            file_output = gr.File(label="Output")
            json_output = gr.JSON(label="References")

    clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
    submit_button_pp.click(fn=wrapped_generator,
                           inputs=[title, description_pp, key, template, tldr_checkbox, sections, bibtex_file,
                                   model_selection], outputs=file_output)

    clear_button_refs.click(fn=clear_inputs_refs, inputs=[title_refs, slider_refs], outputs=[title_refs, slider_refs])
    submit_button_refs.click(fn=wrapped_references_generator,
                           inputs=[title_refs, slider_refs, key], outputs=json_output)

demo.queue(concurrency_count=1, max_size=5, api_open=False)
demo.launch(show_error=True)