Spaces:
Sleeping
Sleeping
Switch to Mistral-7B-Instruct-v0.1; invoke HF API end-point directly
Browse files- app.py +27 -22
- global_config.py +3 -1
- langchain_templates/template_combined.txt +2 -2
- llm_helper.py +46 -13
app.py
CHANGED
@@ -84,26 +84,26 @@ def get_ai_image_wrapper(text: str) -> str:
|
|
84 |
return llm_helper.get_ai_image(text)
|
85 |
|
86 |
|
87 |
-
def get_disk_used_percentage() -> float:
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
|
108 |
|
109 |
def build_ui():
|
@@ -115,8 +115,11 @@ def build_ui():
|
|
115 |
|
116 |
st.title(APP_TEXT['app_name'])
|
117 |
st.subheader(APP_TEXT['caption'])
|
118 |
-
st.markdown('
|
119 |
-
|
|
|
|
|
|
|
120 |
|
121 |
with st.form('my_form'):
|
122 |
# Topic input
|
@@ -188,6 +191,8 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
|
|
188 |
try:
|
189 |
# Step 1: Generate the contents in JSON format using an LLM
|
190 |
json_str = process_slides_contents(topic[:target_length], progress_bar)
|
|
|
|
|
191 |
|
192 |
# Step 2: Generate the slide deck based on the template specified
|
193 |
if len(json_str) > 0:
|
|
|
84 |
return llm_helper.get_ai_image(text)
|
85 |
|
86 |
|
87 |
+
# def get_disk_used_percentage() -> float:
|
88 |
+
# """
|
89 |
+
# Compute the disk usage.
|
90 |
+
#
|
91 |
+
# :return: Percentage of the disk space currently used
|
92 |
+
# """
|
93 |
+
#
|
94 |
+
# total, used, free = shutil.disk_usage(__file__)
|
95 |
+
# total = total // GB_CONVERTER
|
96 |
+
# used = used // GB_CONVERTER
|
97 |
+
# free = free // GB_CONVERTER
|
98 |
+
# used_perc = 100.0 * used / total
|
99 |
+
#
|
100 |
+
# logging.debug(f'Total: {total} GB\n'
|
101 |
+
# f'Used: {used} GB\n'
|
102 |
+
# f'Free: {free} GB')
|
103 |
+
#
|
104 |
+
# logging.debug('\n'.join(os.listdir()))
|
105 |
+
#
|
106 |
+
# return used_perc
|
107 |
|
108 |
|
109 |
def build_ui():
|
|
|
115 |
|
116 |
st.title(APP_TEXT['app_name'])
|
117 |
st.subheader(APP_TEXT['caption'])
|
118 |
+
st.markdown('Using [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).')
|
119 |
+
st.markdown('*If the JSON is generated or parsed incorrectly, try again later by making minor changes '
|
120 |
+
'to the input text.*')
|
121 |
+
st.markdown('~~*Running on GPT-4 at the moment. Image generation has been disabled. '
|
122 |
+
'Will run as long as the community plan supports* :)~~')
|
123 |
|
124 |
with st.form('my_form'):
|
125 |
# Topic input
|
|
|
191 |
try:
|
192 |
# Step 1: Generate the contents in JSON format using an LLM
|
193 |
json_str = process_slides_contents(topic[:target_length], progress_bar)
|
194 |
+
logging.debug(f'{topic[:target_length]=}')
|
195 |
+
logging.debug(f'{json_str=}')
|
196 |
|
197 |
# Step 2: Generate the slide deck based on the template specified
|
198 |
if len(json_str) > 0:
|
global_config.py
CHANGED
@@ -22,11 +22,13 @@ class GlobalConfig:
|
|
22 |
CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
|
23 |
CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
|
24 |
|
25 |
-
|
|
|
26 |
LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
|
27 |
LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
|
28 |
LLM_MODEL_MAX_INPUT_LENGTH: int = 300
|
29 |
|
|
|
30 |
METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
|
31 |
|
32 |
LOG_LEVEL = 'INFO'
|
|
|
22 |
CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
|
23 |
CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
|
24 |
|
25 |
+
HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.1' # 'HuggingFaceH4/zephyr-7b-beta'
|
26 |
+
LLM_MODEL_TEMPERATURE: float = 0.2 # Mistral and Zephyr require a bit high temperature
|
27 |
LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
|
28 |
LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
|
29 |
LLM_MODEL_MAX_INPUT_LENGTH: int = 300
|
30 |
|
31 |
+
HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN', '')
|
32 |
METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
|
33 |
|
34 |
LOG_LEVEL = 'INFO'
|
langchain_templates/template_combined.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
|
|
|
2 |
|
3 |
|
4 |
Topic:
|
@@ -6,7 +7,6 @@ Topic:
|
|
6 |
|
7 |
|
8 |
Desired JSON output format:
|
9 |
-
|
10 |
{
|
11 |
"title": "Presentation Title",
|
12 |
"slides": [
|
@@ -16,7 +16,7 @@ Desired JSON output format:
|
|
16 |
"First bullet point",
|
17 |
[
|
18 |
"Sub-bullet point 1",
|
19 |
-
"Sub-bullet point
|
20 |
],
|
21 |
"Second bullet point"
|
22 |
]
|
|
|
1 |
You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
|
2 |
+
If relevant, add one or two examples to illustrate the concept.
|
3 |
|
4 |
|
5 |
Topic:
|
|
|
7 |
|
8 |
|
9 |
Desired JSON output format:
|
|
|
10 |
{
|
11 |
"title": "Presentation Title",
|
12 |
"slides": [
|
|
|
16 |
"First bullet point",
|
17 |
[
|
18 |
"Sub-bullet point 1",
|
19 |
+
"Sub-bullet point 2"
|
20 |
],
|
21 |
"Second bullet point"
|
22 |
]
|
llm_helper.py
CHANGED
@@ -7,17 +7,20 @@ from langchain.llms import Clarifai
|
|
7 |
from global_config import GlobalConfig
|
8 |
|
9 |
|
|
|
|
|
|
|
10 |
logging.basicConfig(
|
11 |
level=GlobalConfig.LOG_LEVEL,
|
12 |
format='%(asctime)s - %(message)s',
|
13 |
)
|
14 |
|
15 |
-
llm = None
|
16 |
|
17 |
|
18 |
def get_llm(use_gpt: bool) -> Clarifai:
|
19 |
"""
|
20 |
-
Get a large language model.
|
21 |
|
22 |
:param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
|
23 |
"""
|
@@ -45,28 +48,58 @@ def get_llm(use_gpt: bool) -> Clarifai:
|
|
45 |
return _
|
46 |
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def generate_slides_content(topic: str) -> str:
|
49 |
"""
|
50 |
Generate the outline/contents of slides for a presentation on a given topic.
|
51 |
|
52 |
-
:param topic: Topic
|
53 |
:return: The content in JSON format
|
54 |
"""
|
55 |
|
56 |
-
# global prompt
|
57 |
-
global llm
|
58 |
-
|
59 |
with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
|
60 |
template_txt = in_file.read().strip()
|
61 |
template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
|
72 |
def get_ai_image(text: str) -> str:
|
|
|
7 |
from global_config import GlobalConfig
|
8 |
|
9 |
|
10 |
+
HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
|
11 |
+
HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
|
12 |
+
|
13 |
logging.basicConfig(
|
14 |
level=GlobalConfig.LOG_LEVEL,
|
15 |
format='%(asctime)s - %(message)s',
|
16 |
)
|
17 |
|
18 |
+
# llm = None
|
19 |
|
20 |
|
21 |
def get_llm(use_gpt: bool) -> Clarifai:
|
22 |
"""
|
23 |
+
Get a large language model (hosted by Clarifai).
|
24 |
|
25 |
:param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
|
26 |
"""
|
|
|
48 |
return _
|
49 |
|
50 |
|
51 |
+
def hf_api_query(payload: dict):
|
52 |
+
"""
|
53 |
+
Invoke HF inference end-point API.
|
54 |
+
|
55 |
+
:param payload: The prompt for the LLM and related parameters
|
56 |
+
:return: The output from the LLM
|
57 |
+
"""
|
58 |
+
|
59 |
+
logging.debug(f'{payload=}')
|
60 |
+
response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload)
|
61 |
+
return response.json()
|
62 |
+
|
63 |
+
|
64 |
def generate_slides_content(topic: str) -> str:
|
65 |
"""
|
66 |
Generate the outline/contents of slides for a presentation on a given topic.
|
67 |
|
68 |
+
:param topic: Topic on which slides are to be generated
|
69 |
:return: The content in JSON format
|
70 |
"""
|
71 |
|
|
|
|
|
|
|
72 |
with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
|
73 |
template_txt = in_file.read().strip()
|
74 |
template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
|
75 |
|
76 |
+
output = hf_api_query({
|
77 |
+
"inputs": template_txt,
|
78 |
+
"parameters": {
|
79 |
+
'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
|
80 |
+
'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
|
81 |
+
'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
|
82 |
+
'max_new_tokens': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
|
83 |
+
'num_return_sequences': 1,
|
84 |
+
'return_full_text': False,
|
85 |
+
# "repetition_penalty": 0.0001
|
86 |
+
},
|
87 |
+
"options": {
|
88 |
+
'wait_for_model': True,
|
89 |
+
'use_cache': True
|
90 |
+
}
|
91 |
+
})
|
92 |
+
|
93 |
+
output = output[0]['generated_text'].strip()
|
94 |
+
# output = output[len(template_txt):]
|
95 |
+
logging.debug(f'{output=}')
|
96 |
+
|
97 |
+
json_end_idx = output.rfind('```')
|
98 |
+
if json_end_idx != -1:
|
99 |
+
# logging.debug(f'{json_end_idx=}')
|
100 |
+
output = output[:json_end_idx]
|
101 |
+
|
102 |
+
return output
|
103 |
|
104 |
|
105 |
def get_ai_image(text: str) -> str:
|