sc_ma commited on
Commit
ae495a3
1 Parent(s): 6fe5041

Bug fix. Put all generators in auto_backgrounds.py.

Browse files
Files changed (6) hide show
  1. app.py +23 -16
  2. auto_backgrounds.py +37 -5
  3. auto_draft.py +145 -145
  4. requirements.txt +0 -0
  5. section_generator.py +39 -2
  6. utils/storage.py +38 -31
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import gradio as gr
2
  import os
3
  import openai
4
- from auto_backgrounds import generate_backgrounds, fake_generator
5
- from auto_draft import generate_draft
6
 
7
  # todo:
8
  # 1. update README.md and introduction in app.py
9
  # 2. update QQ group and Organization cards
10
- # 3. update autodraft.py to generate a whole paper
11
  # 4. add auto_polishing function
 
12
 
13
  openai_key = os.getenv("OPENAI_API_KEY")
14
  access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
@@ -44,16 +44,22 @@ def wrapped_generator(title, description, openai_key = None,
44
  # if so, download from the cloud storage, return it
45
  # if not, generate the result.
46
  if generator is None:
47
- generator = generate_backgrounds
 
 
 
48
  if openai_key is not None:
49
  openai.api_key = openai_key
50
  openai.Model.list()
51
 
52
  if cache_mode:
53
- from utils.storage import list_all_files, hash_name, download_file, upload_file
54
  # check if "title"+"description" have been generated before
55
- file_name = hash_name(title, description) + ".zip"
 
 
56
  file_list = list_all_files()
 
57
  if file_name in file_list:
58
  # download from the cloud storage, return it
59
  download_file(file_name)
@@ -61,12 +67,12 @@ def wrapped_generator(title, description, openai_key = None,
61
  else:
62
  # generate the result.
63
  # output = fake_generate_backgrounds(title, description, openai_key) # todo: use `generator` to control which function to use.
64
- output = generate_backgrounds(title, description, template, "gpt-4")
65
- upload_file(file_name)
66
  return output
67
  else:
68
  # output = fake_generate_backgrounds(title, description, openai_key)
69
- output = generate_backgrounds(title, description, template, "gpt-4")
70
  return output
71
 
72
 
@@ -80,21 +86,22 @@ with gr.Blocks(theme=theme) as demo:
80
  gr.Markdown('''
81
  # Auto-Draft: 文献整理辅助工具
82
 
83
- 本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_backgrounds功能的测试。通过输入一个领域的名称(比如Deep Reinforcement Learning),即可自动对这个领域的相关文献进行归纳总结.
84
 
85
- ***2023-05-03 Update***: 在这个版本中为大家提供了输入OpenAI API Key的地址, 如果有GPT-4的API KEY的话可以在这里体验!
86
  我也会在近期提供一定的免费体验在这个Huggingface Organization里: [AUTO-ACADEMIC](https://huggingface.co/organizations/auto-academic/share/HPjgazDSlkwLNCWKiAiZoYtXaJIatkWDYM).
87
  如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
88
 
89
  ## 用法
90
 
91
- 输入一个领域的名称(比如Deep Reinforcement Learning), 点击Submit, 等待大概十分钟, 下载.zip格式的输出,在Overleaf上编译浏览.
92
  ''')
93
  with gr.Row():
94
  with gr.Column(scale=2):
95
  key = gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key", visible=not IS_OPENAI_API_KEY_AVAILABLE)
96
- title = gr.Textbox(value="Deep Reinforcement Learning", lines=1, max_lines=1, label="Title")
97
- description = gr.Textbox(lines=5, label="Description (Optional)")
 
98
 
99
  with gr.Row():
100
  clear_button = gr.Button("Clear")
@@ -104,8 +111,8 @@ with gr.Blocks(theme=theme) as demo:
104
  availability_mapping = {True: "AVAILABLE", False: "NOT AVAILABLE"}
105
  gr.Markdown(f'''## Huggingface Space Status
106
  当`OpenAI API`显示AVAILABLE的时候这个Space可以直接使用.
107
- 当`OpenAI API`显示NOT AVAILABLE的时候这个Space可以通过在左侧输入OPENAI KEY来使用. 需要有GPT-4的API权限, 不然会报错.
108
- 当`Cache`显示AVAILABLE的时候, 所有的输入和输出会被备份到我的云储存中. 显示NOT AVAILABLE的时候可以正常使用.
109
  `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availability_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>. `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availability_mapping[IS_CACHE_AVAILABLE]}</span>.''')
110
  file_output = gr.File(label="Output")
111
 
 
1
  import gradio as gr
2
  import os
3
  import openai
4
+ from auto_backgrounds import generate_backgrounds, fake_generator, generate_draft
5
+ from utils.file_operations import hash_name
6
 
7
  # todo:
8
  # 1. update README.md and introduction in app.py
9
  # 2. update QQ group and Organization cards
 
10
  # 4. add auto_polishing function
11
+ # 5. Use Completion to substitute some simple task (including: writing abstract, conclusion, generate keywords, generate figures...)
12
 
13
  openai_key = os.getenv("OPENAI_API_KEY")
14
  access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
 
44
  # if so, download from the cloud storage, return it
45
  # if not, generate the result.
46
  if generator is None:
47
+ # todo: add a Dropdown to select which generator to use.
48
+ # generator = generate_backgrounds
49
+ # generator = generate_draft
50
+ generator = fake_generator
51
  if openai_key is not None:
52
  openai.api_key = openai_key
53
  openai.Model.list()
54
 
55
  if cache_mode:
56
+ from utils.storage import list_all_files, download_file, upload_file
57
  # check if "title"+"description" have been generated before
58
+
59
+ input_dict = {"title": title, "description": description, "generator": "generate_draft"} #todo: modify here also
60
+ file_name = hash_name(input_dict) + ".zip"
61
  file_list = list_all_files()
62
+ # print(f"{file_name} will be generated. Check the file list {file_list}")
63
  if file_name in file_list:
64
  # download from the cloud storage, return it
65
  download_file(file_name)
 
67
  else:
68
  # generate the result.
69
  # output = fake_generate_backgrounds(title, description, openai_key) # todo: use `generator` to control which function to use.
70
+ output = generator(title, description, template, "gpt-4")
71
+ upload_file(output)
72
  return output
73
  else:
74
  # output = fake_generate_backgrounds(title, description, openai_key)
75
+ output = generator(title, description, template, "gpt-4")
76
  return output
77
 
78
 
 
86
  gr.Markdown('''
87
  # Auto-Draft: 文献整理辅助工具
88
 
89
+ 本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试。通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.
90
 
91
+ ***2023-05-03 Update***: 在公开版本中为大家提供了输入OpenAI API Key的地址, 如果有GPT-4的API KEY的话可以在这里体验!
92
  我也会在近期提供一定的免费体验在这个Huggingface Organization里: [AUTO-ACADEMIC](https://huggingface.co/organizations/auto-academic/share/HPjgazDSlkwLNCWKiAiZoYtXaJIatkWDYM).
93
  如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
94
 
95
  ## 用法
96
 
97
+ 输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning), 点击Submit, 等待大概十分钟, 下载.zip格式的输出,在Overleaf上编译浏览.
98
  ''')
99
  with gr.Row():
100
  with gr.Column(scale=2):
101
  key = gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key", visible=not IS_OPENAI_API_KEY_AVAILABLE)
102
+ # generator = gr.Dropdown(choices=["学术论文", "文献总结"], value="文献总结", label="Selection", info="目前支持生成'学术论文'和'文献总结'.", interactive=True)
103
+ title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1, label="Title", info="论文标题")
104
+ description = gr.Textbox(lines=5, label="Description (Optional)", visible=False)
105
 
106
  with gr.Row():
107
  clear_button = gr.Button("Clear")
 
111
  availability_mapping = {True: "AVAILABLE", False: "NOT AVAILABLE"}
112
  gr.Markdown(f'''## Huggingface Space Status
113
  当`OpenAI API`显示AVAILABLE的时候这个Space可以直接使用.
114
+ 当`OpenAI API`显示NOT AVAILABLE的时候这个Space可以通过在左侧输入OPENAI KEY来使用. 需要有GPT-4的API权限.
115
+ 当`Cache`显示AVAILABLE的时候, 所有的输入和输出会被备份到我的云储存中. 显示NOT AVAILABLE的时候不影响实际使用.
116
  `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availability_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>. `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availability_mapping[IS_CACHE_AVAILABLE]}</span>.''')
117
  file_output = gr.File(label="Output")
118
 
auto_backgrounds.py CHANGED
@@ -1,12 +1,13 @@
1
  from utils.references import References
2
  from utils.file_operations import hash_name, make_archive, copy_templates
3
- from section_generator import section_generation_bg, keywords_generation
4
  import logging
5
 
6
  TOTAL_TOKENS = 0
7
  TOTAL_PROMPTS_TOKENS = 0
8
  TOTAL_COMPLETION_TOKENS = 0
9
 
 
10
  def log_usage(usage, generating_target, print_out=True):
11
  global TOTAL_TOKENS
12
  global TOTAL_PROMPTS_TOKENS
@@ -26,7 +27,7 @@ def log_usage(usage, generating_target, print_out=True):
26
  print(message)
27
  logging.info(message)
28
 
29
- def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
30
  paper = {}
31
  paper_body = {}
32
 
@@ -41,9 +42,9 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
41
  print(f"keywords: {keywords}")
42
  log_usage(usage, "keywords")
43
 
44
- ref = References(load_papers = "")
45
  ref.collect_papers(keywords, method="arxiv")
46
- all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
47
 
48
  print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
49
 
@@ -52,6 +53,12 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
52
  paper["references"] = ref.to_prompts()
53
  paper["body"] = paper_body
54
  paper["bibtex"] = bibtex_path
 
 
 
 
 
 
55
 
56
  for section in ["introduction", "related works", "backgrounds"]:
57
  try:
@@ -70,6 +77,31 @@ def fake_generator(title, description="", template="ICLR2022", model="gpt-4"):
70
  """
71
  This function is used to test the whole pipeline without calling OpenAI API.
72
  """
73
- input_dict = {"title": title, "description": description, "generator": "generate_backgrounds"}
74
  filename = hash_name(input_dict) + ".zip"
75
  return make_archive("sample-output.pdf", filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from utils.references import References
2
  from utils.file_operations import hash_name, make_archive, copy_templates
3
+ from section_generator import section_generation_bg, keywords_generation, figures_generation, section_generation
4
  import logging
5
 
6
  TOTAL_TOKENS = 0
7
  TOTAL_PROMPTS_TOKENS = 0
8
  TOTAL_COMPLETION_TOKENS = 0
9
 
10
+
11
  def log_usage(usage, generating_target, print_out=True):
12
  global TOTAL_TOKENS
13
  global TOTAL_PROMPTS_TOKENS
 
27
  print(message)
28
  logging.info(message)
29
 
30
+ def _generation_setup(title, description="", template="ICLR2022", model="gpt-4"):
31
  paper = {}
32
  paper_body = {}
33
 
 
42
  print(f"keywords: {keywords}")
43
  log_usage(usage, "keywords")
44
 
45
+ ref = References(load_papers="")
46
  ref.collect_papers(keywords, method="arxiv")
47
+ all_paper_ids = ref.to_bibtex(bibtex_path) # todo: this will used to check if all citations are in this list
48
 
49
  print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
50
 
 
53
  paper["references"] = ref.to_prompts()
54
  paper["body"] = paper_body
55
  paper["bibtex"] = bibtex_path
56
+ return paper, destination_folder, all_paper_ids
57
+
58
+
59
+
60
+ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
61
+ paper, destination_folder, _ = _generation_setup(title, description, template, model)
62
 
63
  for section in ["introduction", "related works", "backgrounds"]:
64
  try:
 
77
  """
78
  This function is used to test the whole pipeline without calling OpenAI API.
79
  """
80
+ input_dict = {"title": title, "description": description, "generator": "generate_draft"}
81
  filename = hash_name(input_dict) + ".zip"
82
  return make_archive("sample-output.pdf", filename)
83
+
84
+
85
+ def generate_draft(title, description="", template="ICLR2022", model="gpt-4"):
86
+ paper, destination_folder, _ = _generation_setup(title, description, template, model)
87
+
88
+ print("Generating figures ...")
89
+ usage = figures_generation(paper, destination_folder, model="gpt-3.5-turbo")
90
+ # todo: use `figures_generation` function to complete remainings
91
+ # prompts = generate_experiments_prompts(paper)
92
+ # gpt_response, usage = get_responses(prompts, model)
93
+ # list_of_methods = list(extract_json(gpt_response))
94
+ log_usage(usage, "figures")
95
+ # generate_random_figures(list_of_methods, save_to_path + "comparison.png")
96
+
97
+ # for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
98
+ for section in ["introduction", "related works", "backgrounds", "experiments", "conclusion", "abstract"]:
99
+ try:
100
+ usage = section_generation(paper, section, destination_folder, model=model)
101
+ log_usage(usage, section)
102
+ except Exception as e:
103
+ print(f"Failed to generate {section} due to the error: {e}")
104
+
105
+ input_dict = {"title": title, "description": description, "generator": "generate_draft"}
106
+ filename = hash_name(input_dict) + ".zip"
107
+ return make_archive(destination_folder, filename)
auto_draft.py CHANGED
@@ -1,145 +1,145 @@
1
- from utils.references import References
2
- from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts
3
- from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
4
- from utils.tex_processing import replace_title
5
- from utils.figures import generate_random_figures
6
- import datetime
7
- import shutil
8
- import time
9
- import logging
10
- import os
11
-
12
- TOTAL_TOKENS = 0
13
- TOTAL_PROMPTS_TOKENS = 0
14
- TOTAL_COMPLETION_TOKENS = 0
15
-
16
- def make_archive(source, destination):
17
- base = os.path.basename(destination)
18
- name = base.split('.')[0]
19
- format = base.split('.')[1]
20
- archive_from = os.path.dirname(source)
21
- archive_to = os.path.basename(source.strip(os.sep))
22
- shutil.make_archive(name, format, archive_from, archive_to)
23
- shutil.move('%s.%s'%(name,format), destination)
24
- return destination
25
-
26
-
27
- def log_usage(usage, generating_target, print_out=True):
28
- global TOTAL_TOKENS
29
- global TOTAL_PROMPTS_TOKENS
30
- global TOTAL_COMPLETION_TOKENS
31
-
32
- prompts_tokens = usage['prompt_tokens']
33
- completion_tokens = usage['completion_tokens']
34
- total_tokens = usage['total_tokens']
35
-
36
- TOTAL_TOKENS += total_tokens
37
- TOTAL_PROMPTS_TOKENS += prompts_tokens
38
- TOTAL_COMPLETION_TOKENS += completion_tokens
39
-
40
- message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
41
- f"{TOTAL_TOKENS} tokens have been used in total."
42
- if print_out:
43
- print(message)
44
- logging.info(message)
45
-
46
- def pipeline(paper, section, save_to_path, model):
47
- """
48
- The main pipeline of generating a section.
49
- 1. Generate prompts.
50
- 2. Get responses from AI assistant.
51
- 3. Extract the section text.
52
- 4. Save the text to .tex file.
53
- :return usage
54
- """
55
- print(f"Generating {section}...")
56
- prompts = generate_paper_prompts(paper, section)
57
- gpt_response, usage = get_responses(prompts, model)
58
- output = extract_responses(gpt_response)
59
- paper["body"][section] = output
60
- tex_file = save_to_path + f"{section}.tex"
61
- if section == "abstract":
62
- with open(tex_file, "w") as f:
63
- f.write(r"\begin{abstract}")
64
- with open(tex_file, "a") as f:
65
- f.write(output)
66
- with open(tex_file, "a") as f:
67
- f.write(r"\end{abstract}")
68
- else:
69
- with open(tex_file, "w") as f:
70
- f.write(f"\section{{{section}}}\n")
71
- with open(tex_file, "a") as f:
72
- f.write(output)
73
- time.sleep(5)
74
- print(f"{section} has been generated. Saved to {tex_file}.")
75
- return usage
76
-
77
-
78
-
79
- def generate_draft(title, description="", template="ICLR2022", model="gpt-4"):
80
- """
81
- The main pipeline of generating a paper.
82
- 1. Copy everything to the output folder.
83
- 2. Create references.
84
- 3. Generate each section using `pipeline`.
85
- 4. Post-processing: check common errors, fill the title, ...
86
- """
87
- paper = {}
88
- paper_body = {}
89
-
90
- # Create a copy in the outputs folder.
91
- # todo: use copy_templates function instead.
92
- now = datetime.datetime.now()
93
- target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
94
- source_folder = f"latex_templates/{template}"
95
- destination_folder = f"outputs/{target_name}"
96
- shutil.copytree(source_folder, destination_folder)
97
-
98
- bibtex_path = destination_folder + "/ref.bib"
99
- save_to_path = destination_folder +"/"
100
- replace_title(save_to_path, title)
101
- logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")
102
-
103
- # Generate keywords and references
104
- print("Initialize the paper information ...")
105
- prompts = generate_keywords_prompts(title, description)
106
- gpt_response, usage = get_responses(prompts, model)
107
- keywords = extract_keywords(gpt_response)
108
- log_usage(usage, "keywords")
109
- ref = References(load_papers = "") #todo: allow users to upload bibfile.
110
- ref.collect_papers(keywords, method="arxiv") #todo: add more methods to find related papers
111
- all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
112
-
113
- print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
114
-
115
- paper["title"] = title
116
- paper["description"] = description
117
- paper["references"] = ref.to_prompts() #todo: see if this prompts can be compressed.
118
- paper["body"] = paper_body
119
- paper["bibtex"] = bibtex_path
120
-
121
- print("Generating figures ...")
122
- prompts = generate_experiments_prompts(paper)
123
- gpt_response, usage = get_responses(prompts, model)
124
- list_of_methods = list(extract_json(gpt_response))
125
- log_usage(usage, "figures")
126
- generate_random_figures(list_of_methods, save_to_path + "comparison.png")
127
-
128
- for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
129
- try:
130
- usage = pipeline(paper, section, save_to_path, model=model)
131
- log_usage(usage, section)
132
- except Exception as e:
133
- print(f"Failed to generate {section} due to the error: {e}")
134
- print(f"The paper {title} has been generated. Saved to {save_to_path}.")
135
- return make_archive(destination_folder, "output.zip")
136
-
137
- if __name__ == "__main__":
138
- # title = "Training Adversarial Generative Neural Network with Adaptive Dropout Rate"
139
- title = "Playing Atari Game with Deep Reinforcement Learning"
140
- description = ""
141
- template = "ICLR2022"
142
- model = "gpt-4"
143
- # model = "gpt-3.5-turbo"
144
-
145
- generate_draft(title, description, template, model)
 
1
+ # from utils.references import References
2
+ # from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts
3
+ # from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
4
+ # from utils.tex_processing import replace_title
5
+ # from utils.figures import generate_random_figures
6
+ # import datetime
7
+ # import shutil
8
+ # import time
9
+ # import logging
10
+ # import os
11
+ #
12
+ # TOTAL_TOKENS = 0
13
+ # TOTAL_PROMPTS_TOKENS = 0
14
+ # TOTAL_COMPLETION_TOKENS = 0
15
+ #
16
+ # def make_archive(source, destination):
17
+ # base = os.path.basename(destination)
18
+ # name = base.split('.')[0]
19
+ # format = base.split('.')[1]
20
+ # archive_from = os.path.dirname(source)
21
+ # archive_to = os.path.basename(source.strip(os.sep))
22
+ # shutil.make_archive(name, format, archive_from, archive_to)
23
+ # shutil.move('%s.%s'%(name,format), destination)
24
+ # return destination
25
+ #
26
+ #
27
+ # def log_usage(usage, generating_target, print_out=True):
28
+ # global TOTAL_TOKENS
29
+ # global TOTAL_PROMPTS_TOKENS
30
+ # global TOTAL_COMPLETION_TOKENS
31
+ #
32
+ # prompts_tokens = usage['prompt_tokens']
33
+ # completion_tokens = usage['completion_tokens']
34
+ # total_tokens = usage['total_tokens']
35
+ #
36
+ # TOTAL_TOKENS += total_tokens
37
+ # TOTAL_PROMPTS_TOKENS += prompts_tokens
38
+ # TOTAL_COMPLETION_TOKENS += completion_tokens
39
+ #
40
+ # message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
41
+ # f"{TOTAL_TOKENS} tokens have been used in total."
42
+ # if print_out:
43
+ # print(message)
44
+ # logging.info(message)
45
+ #
46
+ # def pipeline(paper, section, save_to_path, model):
47
+ # """
48
+ # The main pipeline of generating a section.
49
+ # 1. Generate prompts.
50
+ # 2. Get responses from AI assistant.
51
+ # 3. Extract the section text.
52
+ # 4. Save the text to .tex file.
53
+ # :return usage
54
+ # """
55
+ # print(f"Generating {section}...")
56
+ # prompts = generate_paper_prompts(paper, section)
57
+ # gpt_response, usage = get_responses(prompts, model)
58
+ # output = extract_responses(gpt_response)
59
+ # paper["body"][section] = output
60
+ # tex_file = save_to_path + f"{section}.tex"
61
+ # if section == "abstract":
62
+ # with open(tex_file, "w") as f:
63
+ # f.write(r"\begin{abstract}")
64
+ # with open(tex_file, "a") as f:
65
+ # f.write(output)
66
+ # with open(tex_file, "a") as f:
67
+ # f.write(r"\end{abstract}")
68
+ # else:
69
+ # with open(tex_file, "w") as f:
70
+ # f.write(f"\section{{{section}}}\n")
71
+ # with open(tex_file, "a") as f:
72
+ # f.write(output)
73
+ # time.sleep(5)
74
+ # print(f"{section} has been generated. Saved to {tex_file}.")
75
+ # return usage
76
+ #
77
+ #
78
+ #
79
+ # def generate_draft(title, description="", template="ICLR2022", model="gpt-4"):
80
+ # """
81
+ # The main pipeline of generating a paper.
82
+ # 1. Copy everything to the output folder.
83
+ # 2. Create references.
84
+ # 3. Generate each section using `pipeline`.
85
+ # 4. Post-processing: check common errors, fill the title, ...
86
+ # """
87
+ # paper = {}
88
+ # paper_body = {}
89
+ #
90
+ # # Create a copy in the outputs folder.
91
+ # # todo: use copy_templates function instead.
92
+ # now = datetime.datetime.now()
93
+ # target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
94
+ # source_folder = f"latex_templates/{template}"
95
+ # destination_folder = f"outputs/{target_name}"
96
+ # shutil.copytree(source_folder, destination_folder)
97
+ #
98
+ # bibtex_path = destination_folder + "/ref.bib"
99
+ # save_to_path = destination_folder +"/"
100
+ # replace_title(save_to_path, title)
101
+ # logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")
102
+ #
103
+ # # Generate keywords and references
104
+ # print("Initialize the paper information ...")
105
+ # prompts = generate_keywords_prompts(title, description)
106
+ # gpt_response, usage = get_responses(prompts, model)
107
+ # keywords = extract_keywords(gpt_response)
108
+ # log_usage(usage, "keywords")
109
+ # ref = References(load_papers = "") #todo: allow users to upload bibfile.
110
+ # ref.collect_papers(keywords, method="arxiv") #todo: add more methods to find related papers
111
+ # all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
112
+ #
113
+ # print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
114
+ #
115
+ # paper["title"] = title
116
+ # paper["description"] = description
117
+ # paper["references"] = ref.to_prompts() #todo: see if this prompts can be compressed.
118
+ # paper["body"] = paper_body
119
+ # paper["bibtex"] = bibtex_path
120
+ #
121
+ # print("Generating figures ...")
122
+ # prompts = generate_experiments_prompts(paper)
123
+ # gpt_response, usage = get_responses(prompts, model)
124
+ # list_of_methods = list(extract_json(gpt_response))
125
+ # log_usage(usage, "figures")
126
+ # generate_random_figures(list_of_methods, save_to_path + "comparison.png")
127
+ #
128
+ # for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
129
+ # try:
130
+ # usage = pipeline(paper, section, save_to_path, model=model)
131
+ # log_usage(usage, section)
132
+ # except Exception as e:
133
+ # print(f"Failed to generate {section} due to the error: {e}")
134
+ # print(f"The paper {title} has been generated. Saved to {save_to_path}.")
135
+ # return make_archive(destination_folder, "output.zip")
136
+ #
137
+ # if __name__ == "__main__":
138
+ # # title = "Training Adversarial Generative Neural Network with Adaptive Dropout Rate"
139
+ # title = "Playing Atari Game with Deep Reinforcement Learning"
140
+ # description = ""
141
+ # template = "ICLR2022"
142
+ # model = "gpt-4"
143
+ # # model = "gpt-3.5-turbo"
144
+ #
145
+ # generate_draft(title, description, template, model)
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
section_generator.py CHANGED
@@ -1,5 +1,6 @@
1
  from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts, generate_bg_summary_prompts
2
  from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
 
3
  import time
4
  import os
5
 
@@ -43,6 +44,38 @@ def section_generation_bg(paper, section, save_to_path, model):
43
  return usage
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def keywords_generation(input_dict, model):
47
  title = input_dict.get("title")
48
  description = input_dict.get("description", "")
@@ -54,5 +87,9 @@ def keywords_generation(input_dict, model):
54
  else:
55
  raise ValueError("`input_dict` must include the key 'title'.")
56
 
57
- def figures_generation():
58
- pass
 
 
 
 
 
1
  from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts, generate_bg_summary_prompts
2
  from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
3
+ from utils.figures import generate_random_figures
4
  import time
5
  import os
6
 
 
44
  return usage
45
 
46
 
47
+ def section_generation(paper, section, save_to_path, model):
48
+ """
49
+ The main pipeline of generating a section.
50
+ 1. Generate prompts.
51
+ 2. Get responses from AI assistant.
52
+ 3. Extract the section text.
53
+ 4. Save the text to .tex file.
54
+ :return usage
55
+ """
56
+ print(f"Generating {section}...")
57
+ prompts = generate_paper_prompts(paper, section)
58
+ gpt_response, usage = get_responses(prompts, model)
59
+ output = extract_responses(gpt_response)
60
+ paper["body"][section] = output
61
+ tex_file = os.path.join(save_to_path, f"{section}.tex")
62
+ # tex_file = save_to_path + f"/{section}.tex"
63
+ if section == "abstract":
64
+ with open(tex_file, "w") as f:
65
+ f.write(r"\begin{abstract}")
66
+ with open(tex_file, "a") as f:
67
+ f.write(output)
68
+ with open(tex_file, "a") as f:
69
+ f.write(r"\end{abstract}")
70
+ else:
71
+ with open(tex_file, "w") as f:
72
+ f.write(f"\section{{{section.upper()}}}\n")
73
+ with open(tex_file, "a") as f:
74
+ f.write(output)
75
+ # time.sleep(5)
76
+ print(f"{section} has been generated. Saved to {tex_file}.")
77
+ return usage
78
+
79
  def keywords_generation(input_dict, model):
80
  title = input_dict.get("title")
81
  description = input_dict.get("description", "")
 
87
  else:
88
  raise ValueError("`input_dict` must include the key 'title'.")
89
 
90
+ def figures_generation(paper, save_to_path, model):
91
+ prompts = generate_experiments_prompts(paper)
92
+ gpt_response, usage = get_responses(prompts, model)
93
+ list_of_methods = list(extract_json(gpt_response))
94
+ generate_random_figures(list_of_methods, os.path.join(save_to_path, "comparison.png"))
95
+ return usage
utils/storage.py CHANGED
@@ -1,45 +1,52 @@
1
  # This script `storage.py` is used to handle the cloud storage.
2
  # `upload_file`:
 
 
3
  # `list_all_files`:
 
4
  # `download_file`:
 
5
 
6
  import os
7
  import boto3
8
 
9
- access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
10
- secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
11
- bucket_name = "hf-storage"
12
 
13
- if (access_key_id is not None) and (secret_access_key is not None):
 
 
14
  session = boto3.Session(
15
  aws_access_key_id=access_key_id,
16
  aws_secret_access_key=secret_access_key,
17
  )
18
-
19
  s3 = session.resource('s3')
20
- bucket = s3.Bucket(bucket_name)
21
-
22
-
23
- def upload_file(file_name, target_name=None):
24
- if target_name is None:
25
- target_name = file_name
26
- try:
27
- s3.meta.client.upload_file(Filename=file_name, Bucket=bucket_name, Key=target_name)
28
- print(f"The file {file_name} has been uploaded!")
29
- except:
30
- print("Uploading failed!")
31
-
32
- def list_all_files():
33
- return [obj.key for obj in bucket.objects.all()]
34
-
35
- def download_file(file_name):
36
- ''' Download `file_name` from the bucket.
37
- Bucket (str) – The name of the bucket to download from.
38
- Key (str) The name of the key to download from.
39
- Filename (str) – The path to the file to download to.
40
- '''
41
- try:
42
- s3.meta.client.download_file(Bucket=bucket_name, Key=file_name, Filename=file_name)
43
- print(f"The file {file_name} has been downloaded!")
44
- except:
45
- print("Uploading failed!")
 
 
 
 
 
1
  # This script `storage.py` is used to handle the cloud storage.
2
  # `upload_file`:
3
+ # Function to upload a local file to the specified S3 bucket.
4
+ # If the target_name is not specified, it will use the file_name as the object key.
5
  # `list_all_files`:
6
+ # Function to list all the files in the specified S3 bucket.
7
  # `download_file`:
8
+ # Function to download a file from the specified S3 bucket to the local machine using the specified file_name.
9
 
10
  import os
11
  import boto3
12
 
13
+ BUCKET_NAME = "hf-storage"
 
 
14
 
15
+ def get_client():
16
+ access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
17
+ secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
18
  session = boto3.Session(
19
  aws_access_key_id=access_key_id,
20
  aws_secret_access_key=secret_access_key,
21
  )
 
22
  s3 = session.resource('s3')
23
+ bucket = s3.Bucket(BUCKET_NAME)
24
+ return s3, bucket
25
+
26
+ def upload_file(file_name, target_name=None):
27
+ s3, _ = get_client()
28
+
29
+ if target_name is None:
30
+ target_name = file_name
31
+ s3.meta.client.upload_file(Filename=file_name, Bucket=BUCKET_NAME, Key=target_name)
32
+ print(f"The file {file_name} has been uploaded!")
33
+
34
+
35
+ def list_all_files():
36
+ _, bucket = get_client()
37
+ return [obj.key for obj in bucket.objects.all()]
38
+
39
+
40
+ def download_file(file_name):
41
+ ''' Download `file_name` from the bucket.
42
+ Bucket (str) – The name of the bucket to download from.
43
+ Key (str) – The name of the key to download from.
44
+ Filename (str) – The path to the file to download to.
45
+ '''
46
+ s3, _ = get_client()
47
+ s3.meta.client.download_file(Bucket=BUCKET_NAME, Key=file_name, Filename=file_name)
48
+ print(f"The file {file_name} has been downloaded!")
49
+
50
+ if __name__ == "__main__":
51
+ file = "sample-output.pdf"
52
+ upload_file(file)