Spaces:

auto-academic
/

auto-draft

Running

App Files Files Community

shaocongma commited on Jun 7, 2023

Commit

c42190b

1 Parent(s): acf8a73

Bug fix: error when abstract is None.

Browse files

Files changed (4) hide show

api_wrapper.py +13 -5
auto_backgrounds.py +22 -10
utils/references.py +6 -2
worker.py +172 -0

api_wrapper.py CHANGED Viewed

@@ -12,18 +12,26 @@ todo:
         If `generator_wrapper` returns nothing or Timeout, or raise any error:
             Change Task status from Running to Failed.
 '''
 from auto_backgrounds import generate_draft
-import json
-GENERATOR_MAPPING = {"draft": generate_draft}
 def generator_wrapper(path_to_config_json):
     # Read configuration file and call corresponding function
     with open(path_to_config_json, "r", encoding='utf-8') as f:
         config = json.load(f)
-    generator = GENERATOR_MAPPING.get(config["generator"])
     if generator is None:
-        pass

         If `generator_wrapper` returns nothing or Timeout, or raise any error:
             Change Task status from Running to Failed.
 '''
+import os.path
 from auto_backgrounds import generate_draft
+import json, time
+from utils.file_operations import make_archive
+# GENERATOR_MAPPING = {"draft": generate_draft}
+GENERATOR_MAPPING = {"draft": None}
 def generator_wrapper(path_to_config_json):
     # Read configuration file and call corresponding function
     with open(path_to_config_json, "r", encoding='utf-8') as f:
         config = json.load(f)
+    print("Configuration:", config)
+    # generator = GENERATOR_MAPPING.get(config["generator"])
+    generator = None
     if generator is None:
+        # generate a fake ZIP file and upload
+        time.sleep(150)
+        zip_path = os.path.splitext(path_to_config_json)[0]+".zip"
+        return make_archive(path_to_config_json, zip_path)

auto_backgrounds.py CHANGED Viewed

@@ -3,7 +3,6 @@ from utils.references import References
 from utils.file_operations import hash_name, make_archive, copy_templates
 from utils.tex_processing import create_copies
 from section_generator import section_generation_bg, keywords_generation, figures_generation, section_generation
-from references_generator import generate_top_k_references
 import logging
 import time
@@ -26,12 +25,14 @@ def log_usage(usage, generating_target, print_out=True):
     TOTAL_PROMPTS_TOKENS += prompts_tokens
     TOTAL_COMPLETION_TOKENS += completion_tokens
-    message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
               f"{TOTAL_TOKENS} tokens have been used in total.\n\n"
     if print_out:
         print(message)
     logging.info(message)
 def _generation_setup(title, description="", template="ICLR2022", tldr=False,
                       max_kw_refs=10, max_num_refs=50, bib_refs=None, max_tokens=2048):
     """
@@ -44,9 +45,12 @@ def _generation_setup(title, description="", template="ICLR2022", tldr=False,
         title (str): The title of the paper.
         description (str, optional): A short description or abstract for the paper. Defaults to an empty string.
         template (str, optional): The template to be used for paper generation. Defaults to "ICLR2022".
-        tldr (bool, optional): A flag indicating whether a TL;DR (Too Long; Didn't Read) summary should be generated for the collected papers. Defaults to False.
-        max_kw_refs (int, optional): The maximum number of references that can be associated with each keyword. Defaults to 10.
-        max_num_refs (int, optional): The maximum number of references that can be included in the paper. Defaults to 50.
         bib_refs (list, optional): A list of pre-existing references in BibTeX format. Defaults to None.
     Returns:
@@ -111,21 +115,29 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
 def generate_draft(title, description="", template="ICLR2022",
                    tldr=True, max_kw_refs=10, max_num_refs=30, sections=None, bib_refs=None, model="gpt-4"):
     # pre-processing `sections` parameter;
     print("================PRE-PROCESSING================")
     if sections is None:
         sections = ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]
     # todo: add more parameters; select which section to generate; select maximum refs.
-    paper, destination_folder, _ = _generation_setup(title, description, template, tldr, max_kw_refs, max_num_refs, bib_refs)
     # main components
     for section in sections:
-        print(f"================Generate {section}================")
         max_attempts = 4
         attempts_count = 0
         while attempts_count < max_attempts:
             try:
                 usage = section_generation(paper, section, destination_folder, model=model)
                 log_usage(usage, section)
                 break
             except Exception as e:
@@ -153,7 +165,7 @@ if __name__ == "__main__":
     import openai
     openai.api_key = os.getenv("OPENAI_API_KEY")
-    title = "Using interpretable boosting algorithms for modeling environmental and agricultural data"
-    description = ""
-    output = generate_draft(title, description, tldr=True, max_kw_refs=10)
     print(output)

 from utils.file_operations import hash_name, make_archive, copy_templates
 from utils.tex_processing import create_copies
 from section_generator import section_generation_bg, keywords_generation, figures_generation, section_generation
 import logging
 import time
     TOTAL_PROMPTS_TOKENS += prompts_tokens
     TOTAL_COMPLETION_TOKENS += completion_tokens
+    message = f"For generating {generating_target}, {total_tokens} tokens have been used " \
+              f"({prompts_tokens} for prompts; {completion_tokens} for completion). " \
               f"{TOTAL_TOKENS} tokens have been used in total.\n\n"
     if print_out:
         print(message)
     logging.info(message)
 def _generation_setup(title, description="", template="ICLR2022", tldr=False,
                       max_kw_refs=10, max_num_refs=50, bib_refs=None, max_tokens=2048):
     """
         title (str): The title of the paper.
         description (str, optional): A short description or abstract for the paper. Defaults to an empty string.
         template (str, optional): The template to be used for paper generation. Defaults to "ICLR2022".
+        tldr (bool, optional): A flag indicating whether a TL;DR (Too Long; Didn't Read) summary should be used
+                               for the collected papers. Defaults to False.
+        max_kw_refs (int, optional): The maximum number of references that can be associated with each keyword.
+                                     Defaults to 10.
+        max_num_refs (int, optional): The maximum number of references that can be included in the paper.
+                                      Defaults to 50.
         bib_refs (list, optional): A list of pre-existing references in BibTeX format. Defaults to None.
     Returns:
 def generate_draft(title, description="", template="ICLR2022",
                    tldr=True, max_kw_refs=10, max_num_refs=30, sections=None, bib_refs=None, model="gpt-4"):
     # pre-processing `sections` parameter;
+    print("================START================")
+    print(f"Generating {title}.")
     print("================PRE-PROCESSING================")
     if sections is None:
         sections = ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]
     # todo: add more parameters; select which section to generate; select maximum refs.
+    if model == "gpt-4":
+        max_tokens = 4096
+    else:
+        max_tokens = 2048
+    paper, destination_folder, _ = _generation_setup(title, description, template, tldr, max_kw_refs, max_num_refs, bib_refs, max_tokens=max_tokens)
     # main components
+    print(f"================PROCESSING================")
     for section in sections:
+        print(f"Generate {section} part...")
         max_attempts = 4
         attempts_count = 0
         while attempts_count < max_attempts:
             try:
                 usage = section_generation(paper, section, destination_folder, model=model)
+                print(f"{section} part has been generated. ")
                 log_usage(usage, section)
                 break
             except Exception as e:
     import openai
     openai.api_key = os.getenv("OPENAI_API_KEY")
+    target_title = "Using interpretable boosting algorithms for modeling environmental and agricultural data"
+    target_description = ""
+    output = generate_draft(target_title, target_description, tldr=True, max_kw_refs=10)
     print(output)

utils/references.py CHANGED Viewed

@@ -334,8 +334,12 @@ class References:
         prompts = {}
         tokens = 0
         for paper in result:
-            prompts[paper["paper_id"]] = paper["abstract"]
-            tokens += tiktoken_len(paper["abstract"])
             if tokens >= max_tokens:
                 break
         return prompts

         prompts = {}
         tokens = 0
         for paper in result:
+            abstract = paper.get("abstract")
+            if abstract is not None and isinstance(abstract, str):
+                prompts[paper["paper_id"]] = paper["abstract"]
+                tokens += tiktoken_len(paper["abstract"])
+            else:
+                prompts[paper["paper_id"]] = " "
             if tokens >= max_tokens:
                 break
         return prompts

worker.py ADDED Viewed

	@@ -0,0 +1,172 @@

+'''
+This script is only used for service-side host.
+'''
+import boto3
+import os, time
+from api_wrapper import generator_wrapper
+from sqlalchemy import create_engine, Table, MetaData, update, select
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy import inspect
+QUEUE_URL = os.getenv('QUEUE_URL')
+AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
+AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
+BUCKET_NAME = os.getenv('BUCKET_NAME')
+DB_STRING = os.getenv('DATABASE_STRING')
+# Create engine
+ENGINE = create_engine(DB_STRING)
+SESSION = sessionmaker(bind=ENGINE)
+#######################################################################################################################
+# Amazon SQS Handler
+#######################################################################################################################
+def get_sqs_client():
+    sqs = boto3.client('sqs', region_name="us-east-2",
+                       aws_access_key_id=AWS_ACCESS_KEY_ID,
+                       aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
+    return sqs
+def receive_message():
+    sqs = get_sqs_client()
+    message = sqs.receive_message(QueueUrl=QUEUE_URL)
+    if message.get('Messages') is not None:
+        receipt_handle = message['Messages'][0]['ReceiptHandle']
+    else:
+        receipt_handle = None
+    return message, receipt_handle
+def delete_message(receipt_handle):
+    sqs = get_sqs_client()
+    response = sqs.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=receipt_handle)
+    return response
+#######################################################################################################################
+# AWS S3 Handler
+#######################################################################################################################
+def get_s3_client():
+    access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+    secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
+    session = boto3.Session(
+        aws_access_key_id=access_key_id,
+        aws_secret_access_key=secret_access_key,
+    )
+    s3 = session.resource('s3')
+    bucket = s3.Bucket(BUCKET_NAME)
+    return s3, bucket
+def upload_file(file_name, target_name=None):
+    s3, _ = get_s3_client()
+    if target_name is None:
+        target_name = file_name
+    s3.meta.client.upload_file(Filename=file_name, Bucket=BUCKET_NAME, Key=target_name)
+    print(f"The file {file_name} has been uploaded!")
+def download_file(file_name):
+    """ Download `file_name` from the bucket.
+    Bucket (str) – The name of the bucket to download from.
+    Key (str) – The name of the key to download from.
+    Filename (str) – The path to the file to download to.
+    """
+    s3, _ = get_s3_client()
+    s3.meta.client.download_file(Bucket=BUCKET_NAME, Key=file_name, Filename=os.path.basename(file_name))
+    print(f"The file {file_name} has been downloaded!")
+#######################################################################################################################
+# AWS SQL Handler
+#######################################################################################################################
+def modify_status(task_id, new_status):
+    session = SESSION()
+    metadata = MetaData()
+    task_to_update = task_id
+    task_table = Table('task', metadata, autoload_with=ENGINE)
+    stmt = select(task_table).where(task_table.c.task_id == task_to_update)
+    # Execute the statement
+    with ENGINE.connect() as connection:
+        result = connection.execute(stmt)
+        # Fetch the first result (if exists)
+        task_data = result.fetchone()
+        # If user_data is not None, the user exists and we can update the password
+        if task_data:
+            # Update statement
+            stmt = (
+                update(task_table).
+                    where(task_table.c.task_id == task_to_update).
+                    values(status=new_status)
+            )
+            # Execute the statement and commit
+            result = connection.execute(stmt)
+            connection.commit()
+    # Close the session
+    session.close()
+#######################################################################################################################
+# Pipline
+#######################################################################################################################
+def pipeline(message_count=0, query_interval=10):
+    # status: 0 - pending (default), 1 - running, 2 - completed, 3 - failed
+    # Query a message from SQS
+    msg, handle = receive_message()
+    if handle is None:
+        print("No message in SQS. ")
+        time.sleep(query_interval)
+    else:
+        print("===============================================================================================")
+        print(f"MESSAGE COUNT: {message_count}")
+        print("===============================================================================================")
+        config_s3_path = msg['Messages'][0]['Body']
+        config_s3_dir = os.path.dirname(config_s3_path)
+        config_local_path = os.path.basename(config_s3_path)
+        task_id, _ = os.path.splitext(config_local_path)
+        print("Initializing ...")
+        print("Configuration file on S3: ", config_s3_path)
+        print("Configuration file on S3 (Directory): ", config_s3_dir)
+        print("Local file path: ", config_local_path)
+        print("Task id: ", task_id)
+        print(f"Success in receiving message: {msg}")
+        print(f"Configuration file path: {config_s3_path}")
+        # Process the downloaded configuration file
+        download_file(config_s3_path)
+        modify_status(task_id, 1)  # status: 0 - pending (default), 1 - running, 2 - completed, 3 - failed
+        delete_message(handle)
+        print(f"Success in the initialization. Message deleted.")
+        print("Running ...")
+        # try:
+        zip_path = generator_wrapper(config_local_path)
+        # Upload the generated file to S3
+        upload_to = os.path.join(config_s3_dir, zip_path).replace("\\", "/")
+        print("Local file path (ZIP): ", zip_path)
+        print("Upload to S3: ", upload_to)
+        upload_file(zip_path, upload_to)
+        modify_status(task_id, 2) # status: 0 - pending (default), 1 - running, 2 - completed, 3 - failed, 4 - deleted
+        print(f"Success in generating the paper.")
+        # Complete.
+        print("Task completed.")
+def initialize_everything():
+    # Clear S3
+    # Clear SQS
+    pass
+if __name__ == "__main__":
+    pipeline()