Spaces:

auto-academic
/

auto-draft

Runtime error

sc_ma commited on Apr 30, 2023

Commit

1b82d4c

1 Parent(s): 1179bb0

Add functions to support cloud storage cache.

Add functions to flatten the latex file (for future polishing).
Update README.md to include additonal codes from different license.

Files changed (6) hide show

README.md +4 -1
app.py +60 -20
auto_backgrounds.py +22 -2
latex-flatten.py +50 -0
utils/gpt_interaction.py +7 -1
utils/storage.py +50 -0

README.md CHANGED Viewed

@@ -44,5 +44,8 @@ Page 1            |  Page 2
 :-------------------------:|:-------------------------:
 ![](assets/page1.png "Page-1") |  ![](assets/page2.png "Page-2")

 :-------------------------:|:-------------------------:
 ![](assets/page1.png "Page-1") |  ![](assets/page2.png "Page-2")
+# License
+This project is licensed under the MIT License.
+Some parts of the code are under different licenses, as listed below:
+* `latex-flatten.py`: Licensed under the Unlicense. Original source: [rekka/latex-flatten](https://github.com/rekka/latex-flatten).

app.py CHANGED Viewed

@@ -1,34 +1,65 @@
 import gradio as gr
-import openai
-from auto_backgrounds import generate_backgrounds
-# todo: 　5. Add more functions in this demo.
 def clear_inputs(text1, text2):
-    return ("", "")
-def wrapped_generate_backgrounds(title, description):
-    if title == "Deep Reinforcement Learning":
-        return "output.zip"
     else:
-        return generate_backgrounds(title, description)
 with gr.Blocks() as demo:
     gr.Markdown('''
     # Auto-Draft: 文献整理辅助工具-限量免费使用
-    本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_backgrounds功能的测试。通过输入一个领域的名称（比如Deep Reinforcement Learning)，
-    即可自动对这个领域的相关文献进行归纳总结.
-    生成一篇论文，需要使用我GPT4的API，大概每篇15000 Tokens(大约0.5到0.8美元).
-    我为大家提供了30刀的额度上限，希望大家有明确需求再使用. 如果有更多需求，建议本地部署, 使用自己的API KEY!
-    ***2023-04-26 Update***: 我本月的余额用完了, 感谢乐乐老师帮忙宣传, 也感觉大家的体验和反馈! 我会按照大家的意见对功能进行改进. 下个月开始仅会在Huggingface
-    的Organization里提供免费的试用, 欢迎有兴趣的同学通过下面的链接加入!
-    [https://huggingface.co/organizations/auto-academic/share/HPjgazDSlkwLNCWKiAiZoYtXaJIatkWDYM](https://huggingface.co/organizations/auto-academic/share/HPjgazDSlkwLNCWKiAiZoYtXaJIatkWDYM)
     ## 用法
@@ -36,6 +67,7 @@ with gr.Blocks() as demo:
     ''')
     with gr.Row():
         with gr.Column():
             title = gr.Textbox(value="Deep Reinforcement Learning", lines=1, max_lines=1, label="Title")
             description = gr.Textbox(lines=5, label="Description (Optional)")
@@ -43,10 +75,18 @@ with gr.Blocks() as demo:
                 clear_button = gr.Button("Clear")
                 submit_button = gr.Button("Submit")
         with gr.Column():
-            file_output = gr.File()
     clear_button.click(fn=clear_inputs, inputs=[title, description], outputs=[title, description])
-    submit_button.click(fn=wrapped_generate_backgrounds, inputs=[title, description], outputs=file_output)
 demo.queue(concurrency_count=1, max_size=5, api_open=False)
-demo.launch()

 import gradio as gr
+import os
+from auto_backgrounds import generate_backgrounds, fake_generate_backgrounds
+openai_key = os.getenv("OPENAI_API_KEY")
+access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
+secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
+if access_key_id is None or secret_access_key is None:
+    print("Access keys are not provided. Outputs cannot be saved to AWS Cloud Storage.")
+    IS_CACHE_AVAILABLE = False
+else:
+    IS_CACHE_AVAILABLE = True
+if openai_key is None:
+    print("OPENAI_API_KEY is not found in environment variables. The output may not be generated.")
+    IS_OPENAI_API_KEY_AVAILABLE = False
+else:
+    # todo: check if this key is available or not
+    IS_OPENAI_API_KEY_AVAILABLE = True
 def clear_inputs(text1, text2):
+    return "", ""
+def wrapped_generate_backgrounds(title, description, openai_key = None, cache_mode = True):
+    # if `cache_mode` is True, then follow the following logic:
+    #        check if "title"+"description" have been generated before
+    #        if so, download from the cloud storage, return it
+    #        if not, generate the result.
+    if cache_mode:
+        from utils.storage import list_all_files, hash_name, download_file, upload_file
+        # check if "title"+"description" have been generated before
+        file_name = hash_name(title, description) + ".zip"
+        file_list = list_all_files()
+        if file_name in file_list:
+            # download from the cloud storage, return it
+            download_file(file_name)
+            return file_name
+        else:
+            # generate the result.
+            # output = fake_generate_backgrounds(title, description, openai_key)
+            output = generate_backgrounds(title, description, openai_key) #todo: change the output of this function to hashed title
+            upload_file(file_name)
+            return output
     else:
+        # output = fake_generate_backgrounds(title, description, openai_key)
+        output = generate_backgrounds(title, description, openai_key) #todo: change the output of this function to hashed title
+        return output
 with gr.Blocks() as demo:
     gr.Markdown('''
     # Auto-Draft: 文献整理辅助工具-限量免费使用
+    本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_backgrounds功能的测试。通过输入一个领域的名称（比如Deep Reinforcement Learning)，即可自动对这个领域的相关文献进行归纳总结.
+    ***2023-04-30 Update***: 如果有更多想法和建议欢迎加入群里交流, 群号: ***249738228***.
+    ***2023-04-26 Update***: 我本月的余额用完了, 感谢乐乐老师帮忙宣传, 也感觉大家的体验和反馈! 我会按照大家的意见对功能进行改进. 下个月会把Space的访问权限限制在Huggingface的Organization里, 欢迎有兴趣的同学通过下面的链接加入! [AUTO-ACADEMIC](https://huggingface.co/organizations/auto-academic/share/HPjgazDSlkwLNCWKiAiZoYtXaJIatkWDYM)
     ## 用法
     ''')
     with gr.Row():
         with gr.Column():
+            key =  gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key", visible=not IS_OPENAI_API_KEY_AVAILABLE)
             title = gr.Textbox(value="Deep Reinforcement Learning", lines=1, max_lines=1, label="Title")
             description = gr.Textbox(lines=5, label="Description (Optional)")
                 clear_button = gr.Button("Clear")
                 submit_button = gr.Button("Submit")
         with gr.Column():
+            style_mapping = {True: "color:white;background-color:green", False: "color:white;background-color:red"}
+            availablity_mapping = {True: "AVAILABLE", False: "NOT AVAILABLE"}
+            gr.Markdown(f'''## Huggingface Space Status
+             当`OpenAI API`显示AVAILABLE的时候这个Space可以直接使用.
+             当`OpenAI API`显示UNAVAILABLE的时候这个Space可以通过在左侧输入OPENAI KEY来使用.
+            `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availablity_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>.  `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availablity_mapping[IS_CACHE_AVAILABLE]}</span>.''')
+            file_output = gr.File(label="Output")
     clear_button.click(fn=clear_inputs, inputs=[title, description], outputs=[title, description])
+    submit_button.click(fn=wrapped_generate_backgrounds, inputs=[title, description, key], outputs=file_output)
 demo.queue(concurrency_count=1, max_size=5, api_open=False)
+demo.launch()

auto_backgrounds.py CHANGED Viewed

@@ -13,6 +13,17 @@ TOTAL_PROMPTS_TOKENS = 0
 TOTAL_COMPLETION_TOKENS = 0
 def log_usage(usage, generating_target, print_out=True):
     global TOTAL_TOKENS
     global TOTAL_PROMPTS_TOKENS
@@ -42,7 +53,7 @@ def make_archive(source, destination):
     shutil.move('%s.%s'%(name,format), destination)
     return destination
-def pipeline(paper, section, save_to_path, model):
     """
     The main pipeline of generating a section.
         1. Generate prompts.
@@ -75,7 +86,7 @@ def pipeline(paper, section, save_to_path, model):
-def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
     paper = {}
     paper_body = {}
@@ -120,6 +131,15 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
     # shutil.make_archive("output.zip", 'zip', save_to_path)
     return make_archive(destination_folder, "output.zip")
 if __name__ == "__main__":
     title = "Reinforcement Learning"
     description = ""

 TOTAL_COMPLETION_TOKENS = 0
+def hash_name(title, description):
+    '''
+    For same title and description, it should return the same value.
+    '''
+    name = title + description
+    name = name.lower()
+    md5 = hashlib.md5()
+    md5.update(name.encode('utf-8'))
+    hashed_string = md5.hexdigest()
+    return hashed_string
 def log_usage(usage, generating_target, print_out=True):
     global TOTAL_TOKENS
     global TOTAL_PROMPTS_TOKENS
     shutil.move('%s.%s'%(name,format), destination)
     return destination
+def pipeline(paper, section, save_to_path, model, openai_key=None):
     """
     The main pipeline of generating a section.
         1. Generate prompts.
+def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4", openai_key=None):
     paper = {}
     paper_body = {}
     # shutil.make_archive("output.zip", 'zip', save_to_path)
     return make_archive(destination_folder, "output.zip")
+def fake_generate_backgrounds(title, description, openai_key = None):
+    """
+    This function is used to test the whole pipeline without calling OpenAI API.
+    """
+    filename = hash_name(title, description) + ".zip"
+    return make_archive("sample-output.pdf", filename)
 if __name__ == "__main__":
     title = "Reinforcement Learning"
     description = ""

latex-flatten.py ADDED Viewed

	@@ -0,0 +1,50 @@

+#!/usr/bin/env python
+# This script is taken from: https://github.com/rekka/latex-flatten
+# A simple script for flattening LaTeX files by inlining included files.
+#
+#   - Supports `\include` and `\input` commands.
+#   - Automatically adds extension `.tex` if the file does not have an extension.
+#   - Handles multiple include commands per line, comments.
+#   - Does not flatten recursively.
+import re
+import sys
+if len(sys.argv)==3:
+    main_name = sys.argv[1]
+    output_name = sys.argv[2]
+else:
+    sys.exit('USAGE: %s main.tex output.tex' %sys.argv[0])
+main = open(main_name,'r')
+output = open(output_name,'w')
+for line in main.readlines():
+    s = re.split('%', line, 2)
+    tex = s[0]
+    if len(s) > 1:
+        comment = '%' + s[1]
+    else:
+        comment = ''
+    chunks = re.split(r'\\(?:input|include)\{[^}]+\}', tex)
+    if len(chunks) > 1:
+        for (c, t) in zip(chunks, re.finditer(r'\\(input|include)\{([^}]+)\}', tex)):
+            cmd_name = t.group(1)
+            include_name = t.group(2)
+            if '.' not in include_name: include_name = include_name + '.tex'
+            if c.strip(): output.write(c + '\n')
+            output.write('% BEGIN \\' + cmd_name + '{' + include_name + '}\n')
+            include = open(include_name, 'r')
+            output.write(include.read())
+            include.close()
+            output.write('% END \\' + cmd_name + '{' + include_name + '}\n')
+        tail = chunks[-1] + comment
+        if tail.strip(): output.write(tail)
+    else:
+        output.write(line)
+output.close()
+main.close()

utils/gpt_interaction.py CHANGED Viewed

@@ -5,6 +5,7 @@ import json
 import logging
 log = logging.getLogger(__name__)
 openai.api_key = os.environ['OPENAI_API_KEY']
 def extract_responses(assistant_message):
@@ -54,7 +55,12 @@ def extract_json(assistant_message, default_output=None):
     return dict.keys()
-def get_responses(user_message, model="gpt-4", temperature=0.4):
     conversation_history = [
         {"role": "system", "content": "You are an assistant in writing machine learning papers."}
     ]

 import logging
 log = logging.getLogger(__name__)
+# todo: 将api_key通过函数传入; 需要改很多地方
 openai.api_key = os.environ['OPENAI_API_KEY']
 def extract_responses(assistant_message):
     return dict.keys()
+def get_responses(user_message, model="gpt-4", temperature=0.4, openai_key = None):
+    if openai.api_key is None and openai_key is None:
+        raise ValueError("OpenAI API key must be provided.")
+    if openai_key is not None:
+        openai.api_key = openai_key
     conversation_history = [
         {"role": "system", "content": "You are an assistant in writing machine learning papers."}
     ]

utils/storage.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import boto3
+import hashlib
+access_key_id = os.environ['AWS_ACCESS_KEY_ID']
+secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
+bucket_name = "hf-storage"
+session = boto3.Session(
+    aws_access_key_id=access_key_id,
+    aws_secret_access_key=secret_access_key,
+)
+s3 = session.resource('s3')
+bucket = s3.Bucket(bucket_name)
+def upload_file(file_name, target_name=None):
+    if target_name is None:
+        target_name = file_name
+    try:
+        s3.meta.client.upload_file(Filename=file_name, Bucket=bucket_name, Key=target_name)
+        print(f"The file {file_name} has been uploaded!")
+    except:
+        print("Uploading failed!")
+def list_all_files():
+    return [obj.key for obj in bucket.objects.all()]
+def download_file(file_name):
+    ''' Download `file_name` from the bucket. todo:check existence before downloading!
+    Bucket (str) – The name of the bucket to download from.
+    Key (str) – The name of the key to download from.
+    Filename (str) – The path to the file to download to.
+    '''
+    try:
+        s3.meta.client.download_file(Bucket=bucket_name, Key=file_name, Filename=file_name)
+        print(f"The file {file_name} has been downloaded!")
+    except:
+        print("Uploading failed!")
+def hash_name(title, description):
+    '''
+    For same title and description, it should return the same value.
+    '''
+    name = title + description
+    name = name.lower()
+    md5 = hashlib.md5()
+    md5.update(name.encode('utf-8'))
+    hashed_string = md5.hexdigest()
+    return hashed_string