File size: 5,346 Bytes
238735e
 
 
 
 
 
 
 
8aec19e
238735e
 
 
 
 
 
1b82d4c
 
 
 
 
 
 
 
 
 
 
238735e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aec19e
 
 
 
 
 
 
 
 
 
1b82d4c
238735e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aec19e
238735e
 
8aec19e
238735e
 
 
 
 
1b82d4c
238735e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8aec19e
26d461c
238735e
1b82d4c
 
 
 
 
 
 
 
 
238735e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from utils.references import References
from utils.prompts import generate_bg_keywords_prompts, generate_bg_summary_prompts
from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
from utils.tex_processing import replace_title
import datetime
import shutil
import time
import logging
import os

TOTAL_TOKENS = 0
TOTAL_PROMPTS_TOKENS = 0
TOTAL_COMPLETION_TOKENS = 0


def hash_name(title, description):
    '''
    For same title and description, it should return the same value.
    '''
    name = title + description
    name = name.lower()
    md5 = hashlib.md5()
    md5.update(name.encode('utf-8'))
    hashed_string = md5.hexdigest()
    return hashed_string

def log_usage(usage, generating_target, print_out=True):
    global TOTAL_TOKENS
    global TOTAL_PROMPTS_TOKENS
    global TOTAL_COMPLETION_TOKENS

    prompts_tokens = usage['prompt_tokens']
    completion_tokens = usage['completion_tokens']
    total_tokens = usage['total_tokens']

    TOTAL_TOKENS += total_tokens
    TOTAL_PROMPTS_TOKENS += prompts_tokens
    TOTAL_COMPLETION_TOKENS += completion_tokens

    message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
              f"{TOTAL_TOKENS} tokens have been used in total."
    if print_out:
        print(message)
    logging.info(message)

def make_archive(source, destination):
    base = os.path.basename(destination)
    name = base.split('.')[0]
    format = base.split('.')[1]
    archive_from = os.path.dirname(source)
    archive_to = os.path.basename(source.strip(os.sep))
    shutil.make_archive(name, format, archive_from, archive_to)
    shutil.move('%s.%s'%(name,format), destination)
    return destination

def pipeline(paper, section, save_to_path, model, openai_key=None):
    """
    The main pipeline of generating a section.
        1. Generate prompts.
        2. Get responses from AI assistant.
        3. Extract the section text.
        4. Save the text to .tex file.
    :return usage
    """
    print(f"Generating {section}...")
    prompts = generate_bg_summary_prompts(paper, section)
    gpt_response, usage = get_responses(prompts, model)
    output = extract_responses(gpt_response)
    paper["body"][section] = output
    tex_file = save_to_path + f"{section}.tex"
    if section == "abstract":
        with open(tex_file, "w") as f:
            f.write(r"\begin{abstract}")
        with open(tex_file, "a") as f:
            f.write(output)
        with open(tex_file, "a") as f:
            f.write(r"\end{abstract}")
    else:
        with open(tex_file, "w") as f:
            f.write(f"\section{{{section.upper()}}}\n")
        with open(tex_file, "a") as f:
            f.write(output)
    time.sleep(5)
    print(f"{section} has been generated. Saved to {tex_file}.")
    return usage



def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4", openai_key=None):
    paper = {}
    paper_body = {}

    # Create a copy in the outputs folder.
    now = datetime.datetime.now()
    target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
    source_folder = f"latex_templates/{template}"
    destination_folder = f"outputs/{target_name}"
    shutil.copytree(source_folder, destination_folder)

    bibtex_path = destination_folder + "/ref.bib"
    save_to_path = destination_folder +"/"
    replace_title(save_to_path, "A Survey on " + title)
    logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")

    # Generate keywords and references
    print("Initialize the paper information ...")
    prompts = generate_bg_keywords_prompts(title, description)
    gpt_response, usage = get_responses(prompts, model)
    keywords = extract_keywords(gpt_response)
    log_usage(usage, "keywords")

    ref = References(load_papers = "")
    ref.collect_papers(keywords, method="arxiv")
    all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list

    print(f"The paper information has been initialized. References are saved to {bibtex_path}.")

    paper["title"] = title
    paper["description"] = description
    paper["references"] = ref.to_prompts() # to_prompts(top_papers)
    paper["body"] = paper_body
    paper["bibtex"] = bibtex_path

    for section in ["introduction", "related works", "backgrounds"]:
        try:
            usage = pipeline(paper, section, save_to_path, model=model)
            log_usage(usage, section)
        except Exception as e:
            print(f"Failed to generate {section} due to the error: {e}")
    print(f"The paper {title} has been generated. Saved to {save_to_path}.")
    # shutil.make_archive("output.zip", 'zip', save_to_path)
    return make_archive(destination_folder, "output.zip")


def fake_generate_backgrounds(title, description, openai_key = None):
    """
    This function is used to test the whole pipeline without calling OpenAI API.
    """
    filename = hash_name(title, description) + ".zip"
    return make_archive("sample-output.pdf", filename)


if __name__ == "__main__":
    title = "Reinforcement Learning"
    description = ""
    template = "Summary"
    model = "gpt-4"
    # model = "gpt-3.5-turbo"

    generate_backgrounds(title, description, template, model)