Spaces:

barunsaha
/

slide-deck-ai

Running

App Files Files Community

barunsaha commited on Nov 18, 2023

Commit

724babe

•

1 Parent(s): 7b01107

Switch to Mistral-7B-Instruct-v0.1; invoke HF API end-point directly

Browse files

Files changed (4) hide show

app.py +27 -22
global_config.py +3 -1
langchain_templates/template_combined.txt +2 -2
llm_helper.py +46 -13

app.py CHANGED Viewed

@@ -84,26 +84,26 @@ def get_ai_image_wrapper(text: str) -> str:
     return llm_helper.get_ai_image(text)
-def get_disk_used_percentage() -> float:
-    """
-    Compute the disk usage.
-    :return: Percentage of the disk space currently used
-    """
-    total, used, free = shutil.disk_usage(__file__)
-    total = total // GB_CONVERTER
-    used = used // GB_CONVERTER
-    free = free // GB_CONVERTER
-    used_perc = 100.0 * used / total
-    logging.debug(f'Total: {total} GB\n'
-                  f'Used: {used} GB\n'
-                  f'Free: {free} GB')
-    logging.debug('\n'.join(os.listdir()))
-    return used_perc
 def build_ui():
@@ -115,8 +115,11 @@ def build_ui():
     st.title(APP_TEXT['app_name'])
     st.subheader(APP_TEXT['caption'])
-    st.markdown('*Running on GPT-4 at the moment. Image generation has been disabled. '
-                'Will run as long as the community plan supports* :)')
     with st.form('my_form'):
         # Topic input
@@ -188,6 +191,8 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
         try:
             # Step 1: Generate the contents in JSON format using an LLM
             json_str = process_slides_contents(topic[:target_length], progress_bar)
             # Step 2: Generate the slide deck based on the template specified
             if len(json_str) > 0:

     return llm_helper.get_ai_image(text)
+# def get_disk_used_percentage() -> float:
+#     """
+#     Compute the disk usage.
+#
+#     :return: Percentage of the disk space currently used
+#     """
+#
+#     total, used, free = shutil.disk_usage(__file__)
+#     total = total // GB_CONVERTER
+#     used = used // GB_CONVERTER
+#     free = free // GB_CONVERTER
+#     used_perc = 100.0 * used / total
+#
+#     logging.debug(f'Total: {total} GB\n'
+#                   f'Used: {used} GB\n'
+#                   f'Free: {free} GB')
+#
+#     logging.debug('\n'.join(os.listdir()))
+#
+#     return used_perc
 def build_ui():
     st.title(APP_TEXT['app_name'])
     st.subheader(APP_TEXT['caption'])
+    st.markdown('Using [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).')
+    st.markdown('*If the JSON is generated or parsed incorrectly, try again later by making minor changes '
+                'to the input text.*')
+    st.markdown('~~*Running on GPT-4 at the moment. Image generation has been disabled. '
+                'Will run as long as the community plan supports* :)~~')
     with st.form('my_form'):
         # Topic input
         try:
             # Step 1: Generate the contents in JSON format using an LLM
             json_str = process_slides_contents(topic[:target_length], progress_bar)
+            logging.debug(f'{topic[:target_length]=}')
+            logging.debug(f'{json_str=}')
             # Step 2: Generate the slide deck based on the template specified
             if len(json_str) > 0:

global_config.py CHANGED Viewed

@@ -22,11 +22,13 @@ class GlobalConfig:
     CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
     CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
-    # LLM_MODEL_TEMPERATURE: float = 0.5
     LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
     LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
     LLM_MODEL_MAX_INPUT_LENGTH: int = 300
     METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
     LOG_LEVEL = 'INFO'

     CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
     CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
+    HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.1'  # 'HuggingFaceH4/zephyr-7b-beta'
+    LLM_MODEL_TEMPERATURE: float = 0.2  # Mistral and Zephyr require a bit high temperature
     LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
     LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
     LLM_MODEL_MAX_INPUT_LENGTH: int = 300
+    HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN', '')
     METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
     LOG_LEVEL = 'INFO'

langchain_templates/template_combined.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
 Topic:
@@ -6,7 +7,6 @@ Topic:
 Desired JSON output format:
 {
     "title": "Presentation Title",
     "slides": [
@@ -16,7 +16,7 @@ Desired JSON output format:
                 "First bullet point",
                 [
                     "Sub-bullet point 1",
-                    "Sub-bullet point 1"
                 ],
                 "Second bullet point"
             ]

 You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
+If relevant, add one or two examples to illustrate the concept.
 Topic:
 Desired JSON output format:
 {
     "title": "Presentation Title",
     "slides": [
                 "First bullet point",
                 [
                     "Sub-bullet point 1",
+                    "Sub-bullet point 2"
                 ],
                 "Second bullet point"
             ]

llm_helper.py CHANGED Viewed

@@ -7,17 +7,20 @@ from langchain.llms import Clarifai
 from global_config import GlobalConfig
 logging.basicConfig(
     level=GlobalConfig.LOG_LEVEL,
     format='%(asctime)s - %(message)s',
 )
-llm = None
 def get_llm(use_gpt: bool) -> Clarifai:
     """
-    Get a large language model.
     :param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
     """
@@ -45,28 +48,58 @@ def get_llm(use_gpt: bool) -> Clarifai:
     return _
 def generate_slides_content(topic: str) -> str:
     """
     Generate the outline/contents of slides for a presentation on a given topic.
-    :param topic: Topic/subject matter/idea on which slides are to be generated
     :return: The content in JSON format
     """
-    # global prompt
-    global llm
     with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
         template_txt = in_file.read().strip()
         template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
-    if llm is None:
-        llm = get_llm(use_gpt=True)
-        print(llm)
-    slides_content = llm(template_txt, verbose=True)
-    return slides_content
 def get_ai_image(text: str) -> str:

 from global_config import GlobalConfig
+HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
+HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
 logging.basicConfig(
     level=GlobalConfig.LOG_LEVEL,
     format='%(asctime)s - %(message)s',
 )
+# llm = None
 def get_llm(use_gpt: bool) -> Clarifai:
     """
+    Get a large language model (hosted by Clarifai).
     :param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
     """
     return _
+def hf_api_query(payload: dict):
+    """
+    Invoke HF inference end-point API.
+    :param payload: The prompt for the LLM and related parameters
+    :return: The output from the LLM
+    """
+    logging.debug(f'{payload=}')
+    response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload)
+    return response.json()
 def generate_slides_content(topic: str) -> str:
     """
     Generate the outline/contents of slides for a presentation on a given topic.
+    :param topic: Topic on which slides are to be generated
     :return: The content in JSON format
     """
     with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
         template_txt = in_file.read().strip()
         template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
+    output = hf_api_query({
+        "inputs": template_txt,
+        "parameters": {
+            'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
+            'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
+            'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
+            'max_new_tokens': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
+            'num_return_sequences': 1,
+            'return_full_text': False,
+            # "repetition_penalty": 0.0001
+        },
+        "options": {
+            'wait_for_model': True,
+            'use_cache': True
+        }
+    })
+    output = output[0]['generated_text'].strip()
+    # output = output[len(template_txt):]
+    logging.debug(f'{output=}')
+    json_end_idx = output.rfind('```')
+    if json_end_idx != -1:
+        # logging.debug(f'{json_end_idx=}')
+        output = output[:json_end_idx]
+    return output
 def get_ai_image(text: str) -> str: