Spaces:
Running
Running
File size: 5,346 Bytes
238735e 8aec19e 238735e 1b82d4c 238735e 8aec19e 1b82d4c 238735e 8aec19e 238735e 8aec19e 238735e 1b82d4c 238735e 8aec19e 26d461c 238735e 1b82d4c 238735e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
from utils.references import References
from utils.prompts import generate_bg_keywords_prompts, generate_bg_summary_prompts
from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
from utils.tex_processing import replace_title
import datetime
import shutil
import time
import logging
import os
TOTAL_TOKENS = 0
TOTAL_PROMPTS_TOKENS = 0
TOTAL_COMPLETION_TOKENS = 0
def hash_name(title, description):
'''
For same title and description, it should return the same value.
'''
name = title + description
name = name.lower()
md5 = hashlib.md5()
md5.update(name.encode('utf-8'))
hashed_string = md5.hexdigest()
return hashed_string
def log_usage(usage, generating_target, print_out=True):
global TOTAL_TOKENS
global TOTAL_PROMPTS_TOKENS
global TOTAL_COMPLETION_TOKENS
prompts_tokens = usage['prompt_tokens']
completion_tokens = usage['completion_tokens']
total_tokens = usage['total_tokens']
TOTAL_TOKENS += total_tokens
TOTAL_PROMPTS_TOKENS += prompts_tokens
TOTAL_COMPLETION_TOKENS += completion_tokens
message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
f"{TOTAL_TOKENS} tokens have been used in total."
if print_out:
print(message)
logging.info(message)
def make_archive(source, destination):
base = os.path.basename(destination)
name = base.split('.')[0]
format = base.split('.')[1]
archive_from = os.path.dirname(source)
archive_to = os.path.basename(source.strip(os.sep))
shutil.make_archive(name, format, archive_from, archive_to)
shutil.move('%s.%s'%(name,format), destination)
return destination
def pipeline(paper, section, save_to_path, model, openai_key=None):
"""
The main pipeline of generating a section.
1. Generate prompts.
2. Get responses from AI assistant.
3. Extract the section text.
4. Save the text to .tex file.
:return usage
"""
print(f"Generating {section}...")
prompts = generate_bg_summary_prompts(paper, section)
gpt_response, usage = get_responses(prompts, model)
output = extract_responses(gpt_response)
paper["body"][section] = output
tex_file = save_to_path + f"{section}.tex"
if section == "abstract":
with open(tex_file, "w") as f:
f.write(r"\begin{abstract}")
with open(tex_file, "a") as f:
f.write(output)
with open(tex_file, "a") as f:
f.write(r"\end{abstract}")
else:
with open(tex_file, "w") as f:
f.write(f"\section{{{section.upper()}}}\n")
with open(tex_file, "a") as f:
f.write(output)
time.sleep(5)
print(f"{section} has been generated. Saved to {tex_file}.")
return usage
def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4", openai_key=None):
paper = {}
paper_body = {}
# Create a copy in the outputs folder.
now = datetime.datetime.now()
target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
source_folder = f"latex_templates/{template}"
destination_folder = f"outputs/{target_name}"
shutil.copytree(source_folder, destination_folder)
bibtex_path = destination_folder + "/ref.bib"
save_to_path = destination_folder +"/"
replace_title(save_to_path, "A Survey on " + title)
logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")
# Generate keywords and references
print("Initialize the paper information ...")
prompts = generate_bg_keywords_prompts(title, description)
gpt_response, usage = get_responses(prompts, model)
keywords = extract_keywords(gpt_response)
log_usage(usage, "keywords")
ref = References(load_papers = "")
ref.collect_papers(keywords, method="arxiv")
all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
paper["title"] = title
paper["description"] = description
paper["references"] = ref.to_prompts() # to_prompts(top_papers)
paper["body"] = paper_body
paper["bibtex"] = bibtex_path
for section in ["introduction", "related works", "backgrounds"]:
try:
usage = pipeline(paper, section, save_to_path, model=model)
log_usage(usage, section)
except Exception as e:
print(f"Failed to generate {section} due to the error: {e}")
print(f"The paper {title} has been generated. Saved to {save_to_path}.")
# shutil.make_archive("output.zip", 'zip', save_to_path)
return make_archive(destination_folder, "output.zip")
def fake_generate_backgrounds(title, description, openai_key = None):
"""
This function is used to test the whole pipeline without calling OpenAI API.
"""
filename = hash_name(title, description) + ".zip"
return make_archive("sample-output.pdf", filename)
if __name__ == "__main__":
title = "Reinforcement Learning"
description = ""
template = "Summary"
model = "gpt-4"
# model = "gpt-3.5-turbo"
generate_backgrounds(title, description, template, model)
|