from utils.references import References from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json from utils.tex_processing import replace_title from utils.figures import generate_random_figures import datetime import shutil import time import logging import os TOTAL_TOKENS = 0 TOTAL_PROMPTS_TOKENS = 0 TOTAL_COMPLETION_TOKENS = 0 def make_archive(source, destination): base = os.path.basename(destination) name = base.split('.')[0] format = base.split('.')[1] archive_from = os.path.dirname(source) archive_to = os.path.basename(source.strip(os.sep)) shutil.make_archive(name, format, archive_from, archive_to) shutil.move('%s.%s'%(name,format), destination) return destination def log_usage(usage, generating_target, print_out=True): global TOTAL_TOKENS global TOTAL_PROMPTS_TOKENS global TOTAL_COMPLETION_TOKENS prompts_tokens = usage['prompt_tokens'] completion_tokens = usage['completion_tokens'] total_tokens = usage['total_tokens'] TOTAL_TOKENS += total_tokens TOTAL_PROMPTS_TOKENS += prompts_tokens TOTAL_COMPLETION_TOKENS += completion_tokens message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \ f"{TOTAL_TOKENS} tokens have been used in total." if print_out: print(message) logging.info(message) def pipeline(paper, section, save_to_path, model): """ The main pipeline of generating a section. 1. Generate prompts. 2. Get responses from AI assistant. 3. Extract the section text. 4. Save the text to .tex file. :return usage """ print(f"Generating {section}...") prompts = generate_paper_prompts(paper, section) gpt_response, usage = get_responses(prompts, model) output = extract_responses(gpt_response) paper["body"][section] = output tex_file = save_to_path + f"{section}.tex" if section == "abstract": with open(tex_file, "w") as f: f.write(r"\begin{abstract}") with open(tex_file, "a") as f: f.write(output) with open(tex_file, "a") as f: f.write(r"\end{abstract}") else: with open(tex_file, "w") as f: f.write(f"\section{{{section}}}\n") with open(tex_file, "a") as f: f.write(output) time.sleep(5) print(f"{section} has been generated. Saved to {tex_file}.") return usage def generate_draft(title, description="", template="ICLR2022", model="gpt-4"): """ The main pipeline of generating a paper. 1. Copy everything to the output folder. 2. Create references. 3. Generate each section using `pipeline`. 4. Post-processing: check common errors, fill the title, ... """ paper = {} paper_body = {} # Create a copy in the outputs folder. # todo: use copy_templates function instead. now = datetime.datetime.now() target_name = now.strftime("outputs_%Y%m%d_%H%M%S") source_folder = f"latex_templates/{template}" destination_folder = f"outputs/{target_name}" shutil.copytree(source_folder, destination_folder) bibtex_path = destination_folder + "/ref.bib" save_to_path = destination_folder +"/" replace_title(save_to_path, title) logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log") # Generate keywords and references print("Initialize the paper information ...") prompts = generate_keywords_prompts(title, description) gpt_response, usage = get_responses(prompts, model) keywords = extract_keywords(gpt_response) log_usage(usage, "keywords") ref = References(load_papers = "") #todo: allow users to upload bibfile. ref.collect_papers(keywords, method="arxiv") #todo: add more methods to find related papers all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list print(f"The paper information has been initialized. References are saved to {bibtex_path}.") paper["title"] = title paper["description"] = description paper["references"] = ref.to_prompts() #todo: see if this prompts can be compressed. paper["body"] = paper_body paper["bibtex"] = bibtex_path print("Generating figures ...") prompts = generate_experiments_prompts(paper) gpt_response, usage = get_responses(prompts, model) list_of_methods = list(extract_json(gpt_response)) log_usage(usage, "figures") generate_random_figures(list_of_methods, save_to_path + "comparison.png") for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]: try: usage = pipeline(paper, section, save_to_path, model=model) log_usage(usage, section) except Exception as e: print(f"Failed to generate {section} due to the error: {e}") print(f"The paper {title} has been generated. Saved to {save_to_path}.") return make_archive(destination_folder, "output.zip") if __name__ == "__main__": # title = "Training Adversarial Generative Neural Network with Adaptive Dropout Rate" title = "Playing Atari Game with Deep Reinforcement Learning" description = "" template = "ICLR2022" model = "gpt-4" # model = "gpt-3.5-turbo" generate_draft(title, description, template, model)