barunsaha commited on
Commit
6d7d653
1 Parent(s): 1d82a0b

Pylint & CVE fix (#1)

Browse files

- Fix (some) Pylint warning & remove some unused code (ae312d00decf61d78caf4d9aba0cd2239eb93aeb)
- Upgrade package versions (4785fdbdfadefafdcfd3da9f323c3c1bcdb02eb6)

Files changed (5) hide show
  1. app.py +30 -34
  2. global_config.py +2 -16
  3. llm_helper.py +10 -88
  4. pptx_helper.py +11 -6
  5. requirements.txt +2 -2
app.py CHANGED
@@ -12,7 +12,7 @@ import pptx_helper
12
  from global_config import GlobalConfig
13
 
14
 
15
- APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r').read())
16
  GB_CONVERTER = 2 ** 30
17
 
18
 
@@ -68,18 +68,6 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
68
  return results
69
 
70
 
71
- @st.cache_data
72
- def get_ai_image_wrapper(text: str) -> str:
73
- """
74
- Fetch and cache a Base 64-encoded image by calling an external API.
75
-
76
- :param text: The image prompt
77
- :return: The Base 64-encoded image
78
- """
79
-
80
- return llm_helper.get_ai_image(text)
81
-
82
-
83
  # def get_disk_used_percentage() -> float:
84
  # """
85
  # Compute the disk usage.
@@ -111,14 +99,19 @@ def build_ui():
111
 
112
  st.title(APP_TEXT['app_name'])
113
  st.subheader(APP_TEXT['caption'])
114
- st.markdown('Powered by [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).')
115
- st.markdown('*If the JSON is generated or parsed incorrectly, try again later by making minor changes '
116
- 'to the input text.*')
 
 
 
 
 
117
 
118
  with st.form('my_form'):
119
  # Topic input
120
  try:
121
- with open(GlobalConfig.PRELOAD_DATA_FILE, 'r') as in_file:
122
  preload_data = json5.loads(in_file.read())
123
  except (FileExistsError, FileNotFoundError):
124
  preload_data = {'topic': '', 'audience': ''}
@@ -158,7 +151,8 @@ def build_ui():
158
  st.text(APP_TEXT['tos2'])
159
 
160
  st.markdown(
161
- '![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbarunsaha%2Fslide-deck-ai&countColor=%23263759)'
 
162
  )
163
 
164
 
@@ -173,20 +167,17 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
173
  """
174
 
175
  topic_length = len(topic)
176
- logging.debug(f'Input length:: topic: {topic_length}')
177
 
178
  if topic_length >= 10:
179
- logging.debug(
180
- f'Topic: {topic}\n'
181
- )
182
-
183
  target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
184
 
185
  try:
186
  # Step 1: Generate the contents in JSON format using an LLM
187
  json_str = process_slides_contents(topic[:target_length], progress_bar)
188
- logging.debug(f'{topic[:target_length]=}')
189
- logging.debug(f'{len(json_str)=}')
190
 
191
  # Step 2: Generate the slide deck based on the template specified
192
  if len(json_str) > 0:
@@ -196,8 +187,10 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
196
  icon="💡️"
197
  )
198
  else:
199
- st.error('Unfortunately, JSON generation failed, so the next steps would lead to nowhere.'
200
- ' Try again or come back later.')
 
 
201
  return
202
 
203
  all_headers = generate_slide_deck(json_str, pptx_template, progress_bar)
@@ -225,15 +218,14 @@ def process_slides_contents(text: str, progress_bar: st.progress) -> str:
225
  json_str = ''
226
 
227
  try:
228
- logging.info(f'Calling LLM for content generation on the topic: {text}')
229
  json_str = get_contents_wrapper(text)
230
  except Exception as ex:
231
- st.error(f'An exception occurred while trying to convert to JSON.'
232
- f' It could be because of heavy traffic or something else.'
233
- f' Try doing it again or try again later.\n'
234
- f' Error message: {ex}')
235
-
236
- # logging.debug(f'JSON: {json_str}')
237
 
238
  progress_bar.progress(50, text='Contents generated')
239
 
@@ -316,6 +308,10 @@ def show_bonus_stuff(ppt_headers: List[str]):
316
 
317
 
318
  def main():
 
 
 
 
319
  build_ui()
320
 
321
 
 
12
  from global_config import GlobalConfig
13
 
14
 
15
+ APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r', encoding='utf-8').read())
16
  GB_CONVERTER = 2 ** 30
17
 
18
 
 
68
  return results
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # def get_disk_used_percentage() -> float:
72
  # """
73
  # Compute the disk usage.
 
99
 
100
  st.title(APP_TEXT['app_name'])
101
  st.subheader(APP_TEXT['caption'])
102
+ st.markdown(
103
+ 'Powered by'
104
+ ' [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).'
105
+ )
106
+ st.markdown(
107
+ '*If the JSON is generated or parsed incorrectly, try again later by making minor changes'
108
+ ' to the input text.*'
109
+ )
110
 
111
  with st.form('my_form'):
112
  # Topic input
113
  try:
114
+ with open(GlobalConfig.PRELOAD_DATA_FILE, 'r', encoding='utf-8') as in_file:
115
  preload_data = json5.loads(in_file.read())
116
  except (FileExistsError, FileNotFoundError):
117
  preload_data = {'topic': '', 'audience': ''}
 
151
  st.text(APP_TEXT['tos2'])
152
 
153
  st.markdown(
154
+ '![Visitors]'
155
+ '(https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbarunsaha%2Fslide-deck-ai&countColor=%23263759)'
156
  )
157
 
158
 
 
167
  """
168
 
169
  topic_length = len(topic)
170
+ logging.debug('Input length:: topic: %s', topic_length)
171
 
172
  if topic_length >= 10:
173
+ logging.debug('Topic: %s', topic)
 
 
 
174
  target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
175
 
176
  try:
177
  # Step 1: Generate the contents in JSON format using an LLM
178
  json_str = process_slides_contents(topic[:target_length], progress_bar)
179
+ logging.debug('Truncated topic: %s', topic[:target_length])
180
+ logging.debug('Length of JSON: %d', len(json_str))
181
 
182
  # Step 2: Generate the slide deck based on the template specified
183
  if len(json_str) > 0:
 
187
  icon="💡️"
188
  )
189
  else:
190
+ st.error(
191
+ 'Unfortunately, JSON generation failed, so the next steps would lead'
192
+ ' to nowhere. Try again or come back later.'
193
+ )
194
  return
195
 
196
  all_headers = generate_slide_deck(json_str, pptx_template, progress_bar)
 
218
  json_str = ''
219
 
220
  try:
221
+ logging.info('Calling LLM for content generation on the topic: %s', text)
222
  json_str = get_contents_wrapper(text)
223
  except Exception as ex:
224
+ st.error(
225
+ f'An exception occurred while trying to convert to JSON. It could be because of heavy'
226
+ f' traffic or something else. Try doing it again or try again later.'
227
+ f'\nError message: {ex}'
228
+ )
 
229
 
230
  progress_bar.progress(50, text='Contents generated')
231
 
 
308
 
309
 
310
  def main():
311
+ """
312
+ Trigger application run.
313
+ """
314
+
315
  build_ui()
316
 
317
 
global_config.py CHANGED
@@ -1,6 +1,7 @@
 
 
1
  from dataclasses import dataclass
2
  from dotenv import load_dotenv
3
- import os
4
 
5
 
6
  load_dotenv()
@@ -8,20 +9,6 @@ load_dotenv()
8
 
9
  @dataclass(frozen=True)
10
  class GlobalConfig:
11
- # CLARIFAI_PAT = os.environ.get('CLARIFAI_PAT', '')
12
- # CLARIFAI_USER_ID = 'meta'
13
- # CLARIFAI_APP_ID = 'Llama-2'
14
- # CLARIFAI_MODEL_ID = 'llama2-13b-chat'
15
- #
16
- # CLARIFAI_USER_ID_GPT = 'openai'
17
- # CLARIFAI_APP_ID_GPT = 'chat-completion'
18
- # CLARIFAI_MODEL_ID_GPT = 'GPT-4' # 'GPT-3_5-turbo'
19
- #
20
- # CLARIFAI_USER_ID_SD = 'stability-ai'
21
- # CLARIFAI_APP_ID_SD = 'stable-diffusion-2'
22
- # CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
23
- # CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
24
-
25
  HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.2'
26
  LLM_MODEL_TEMPERATURE: float = 0.2
27
  LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
@@ -51,4 +38,3 @@ class GlobalConfig:
51
  'caption': 'Marvel in a monochrome dream'
52
  }
53
  }
54
-
 
1
+ import os
2
+
3
  from dataclasses import dataclass
4
  from dotenv import load_dotenv
 
5
 
6
 
7
  load_dotenv()
 
9
 
10
  @dataclass(frozen=True)
11
  class GlobalConfig:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.2'
13
  LLM_MODEL_TEMPERATURE: float = 0.2
14
  LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
 
38
  'caption': 'Marvel in a monochrome dream'
39
  }
40
  }
 
llm_helper.py CHANGED
@@ -1,8 +1,5 @@
1
- import json
2
  import logging
3
- import time
4
  import requests
5
- from langchain.llms import Clarifai
6
 
7
  from global_config import GlobalConfig
8
 
@@ -18,36 +15,6 @@ logging.basicConfig(
18
  # llm = None
19
 
20
 
21
- def get_llm(use_gpt: bool) -> Clarifai:
22
- """
23
- Get a large language model (hosted by Clarifai).
24
-
25
- :param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
26
- """
27
-
28
- if use_gpt:
29
- _ = Clarifai(
30
- pat=GlobalConfig.CLARIFAI_PAT,
31
- user_id=GlobalConfig.CLARIFAI_USER_ID_GPT,
32
- app_id=GlobalConfig.CLARIFAI_APP_ID_GPT,
33
- model_id=GlobalConfig.CLARIFAI_MODEL_ID_GPT,
34
- verbose=True,
35
- # temperature=0.1,
36
- )
37
- else:
38
- _ = Clarifai(
39
- pat=GlobalConfig.CLARIFAI_PAT,
40
- user_id=GlobalConfig.CLARIFAI_USER_ID,
41
- app_id=GlobalConfig.CLARIFAI_APP_ID,
42
- model_id=GlobalConfig.CLARIFAI_MODEL_ID,
43
- verbose=True,
44
- # temperature=0.1,
45
- )
46
- # print(llm)
47
-
48
- return _
49
-
50
-
51
  def hf_api_query(payload: dict):
52
  """
53
  Invoke HF inference end-point API.
@@ -56,9 +23,14 @@ def hf_api_query(payload: dict):
56
  :return: The output from the LLM
57
  """
58
 
59
- # logging.debug(f'{payload=}')
60
- response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload)
61
- return response.json()
 
 
 
 
 
62
 
63
 
64
  def generate_slides_content(topic: str) -> str:
@@ -69,7 +41,7 @@ def generate_slides_content(topic: str) -> str:
69
  :return: The content in JSON format
70
  """
71
 
72
- with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
73
  template_txt = in_file.read().strip()
74
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
75
 
@@ -98,61 +70,11 @@ def generate_slides_content(topic: str) -> str:
98
  # logging.debug(f'{json_end_idx=}')
99
  output = output[:json_end_idx]
100
 
101
- logging.debug(f'{output=}')
102
 
103
  return output
104
 
105
 
106
- def get_ai_image(text: str) -> str:
107
- """
108
- Get a Stable Diffusion-generated image based on a given text.
109
-
110
- :param text: The input text
111
- :return: The Base 64-encoded image
112
- """
113
-
114
- url = f'''https://api.clarifai.com/v2/users/{GlobalConfig.CLARIFAI_USER_ID_SD}/apps/{GlobalConfig.CLARIFAI_APP_ID_SD}/models/{GlobalConfig.CLARIFAI_MODEL_ID_SD}/versions/{GlobalConfig.CLARIFAI_MODEL_VERSION_ID_SD}/outputs'''
115
- headers = {
116
- "Content-Type": "application/json",
117
- "Authorization": f'Key {GlobalConfig.CLARIFAI_PAT}'
118
- }
119
- data = {
120
- "inputs": [
121
- {
122
- "data": {
123
- "text": {
124
- "raw": text
125
- }
126
- }
127
- }
128
- ]
129
- }
130
-
131
- # print('*** AI image generator...')
132
- # print(url)
133
-
134
- start = time.time()
135
- response = requests.post(
136
- url=url,
137
- headers=headers,
138
- data=json.dumps(data)
139
- )
140
- stop = time.time()
141
-
142
- # print('Response:', response, response.status_code)
143
- logging.debug('Image generation took', stop - start, 'seconds')
144
- img_data = ''
145
-
146
- if response.ok:
147
- # print('*** Clarifai SDXL request: Response OK')
148
- json_data = json.loads(response.text)
149
- img_data = json_data['outputs'][0]['data']['image']['base64']
150
- else:
151
- logging.error('*** Image generation failed:', response.text)
152
-
153
- return img_data
154
-
155
-
156
  if __name__ == '__main__':
157
  # results = get_related_websites('5G AI WiFi 6')
158
  #
 
 
1
  import logging
 
2
  import requests
 
3
 
4
  from global_config import GlobalConfig
5
 
 
15
  # llm = None
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def hf_api_query(payload: dict):
19
  """
20
  Invoke HF inference end-point API.
 
23
  :return: The output from the LLM
24
  """
25
 
26
+ try:
27
+ response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
28
+ result = response.json()
29
+ except requests.exceptions.Timeout as te:
30
+ logging.error('*** Error: hf_api_query timeout! %s', str(te))
31
+ result = {}
32
+
33
+ return result
34
 
35
 
36
  def generate_slides_content(topic: str) -> str:
 
41
  :return: The content in JSON format
42
  """
43
 
44
+ with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
45
  template_txt = in_file.read().strip()
46
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
47
 
 
70
  # logging.debug(f'{json_end_idx=}')
71
  output = output[:json_end_idx]
72
 
73
+ logging.debug('generate_slides_content: output: %s', output)
74
 
75
  return output
76
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  if __name__ == '__main__':
79
  # results = get_related_websites('5G AI WiFi 6')
80
  #
pptx_helper.py CHANGED
@@ -1,10 +1,11 @@
 
1
  import pathlib
 
2
  import tempfile
3
  from typing import List, Tuple
 
4
  import json5
5
- import logging
6
  import pptx
7
- import re
8
  import yaml
9
 
10
  from global_config import GlobalConfig
@@ -57,7 +58,8 @@ def generate_powerpoint_presentation(
57
  """
58
  Create and save a PowerPoint presentation file containing the contents in JSON or YAML format.
59
 
60
- :param structured_data: The presentation contents as "JSON" (may contain trailing commas) or YAML
 
61
  :param as_yaml: True if the input data is in YAML format; False if it is in JSON format
62
  :param slides_template: The PPTX template to use
63
  :param output_file_path: The path of the PPTX file to save as
@@ -69,13 +71,16 @@ def generate_powerpoint_presentation(
69
  try:
70
  parsed_data = yaml.safe_load(structured_data)
71
  except yaml.parser.ParserError as ype:
72
- logging.error(f'*** YAML parse error: {ype}')
73
  parsed_data = {'title': '', 'slides': []}
74
  else:
75
  # The structured "JSON" might contain trailing commas, so using json5
76
  parsed_data = json5.loads(structured_data)
77
 
78
- logging.debug(f"*** Using PPTX template: {GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']}")
 
 
 
79
  presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])
80
 
81
  # The title slide
@@ -84,7 +89,7 @@ def generate_powerpoint_presentation(
84
  title = slide.shapes.title
85
  subtitle = slide.placeholders[1]
86
  title.text = parsed_data['title']
87
- logging.debug(f'Title is: {title.text}')
88
  subtitle.text = 'by Myself and SlideDeck AI :)'
89
  all_headers = [title.text, ]
90
 
 
1
+ import logging
2
  import pathlib
3
+ import re
4
  import tempfile
5
  from typing import List, Tuple
6
+
7
  import json5
 
8
  import pptx
 
9
  import yaml
10
 
11
  from global_config import GlobalConfig
 
58
  """
59
  Create and save a PowerPoint presentation file containing the contents in JSON or YAML format.
60
 
61
+ :param structured_data: The presentation contents as "JSON" (may contain trailing commas) or
62
+ YAML
63
  :param as_yaml: True if the input data is in YAML format; False if it is in JSON format
64
  :param slides_template: The PPTX template to use
65
  :param output_file_path: The path of the PPTX file to save as
 
71
  try:
72
  parsed_data = yaml.safe_load(structured_data)
73
  except yaml.parser.ParserError as ype:
74
+ logging.error('*** YAML parse error: %s', str(ype))
75
  parsed_data = {'title': '', 'slides': []}
76
  else:
77
  # The structured "JSON" might contain trailing commas, so using json5
78
  parsed_data = json5.loads(structured_data)
79
 
80
+ logging.debug(
81
+ "*** Using PPTX template: %s",
82
+ GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
83
+ )
84
  presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])
85
 
86
  # The title slide
 
89
  title = slide.shapes.title
90
  subtitle = slide.placeholders[1]
91
  title.text = parsed_data['title']
92
+ logging.debug('Presentation title is: %s', title.text)
93
  subtitle.text = 'by Myself and SlideDeck AI :)'
94
  all_headers = [title.text, ]
95
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  python-dotenv[cli]~=1.0.0
2
- langchain~=0.0.273
3
  # huggingface_hub
4
- streamlit~=1.26.0
5
  clarifai==9.7.4
6
 
7
  python-pptx
 
1
  python-dotenv[cli]~=1.0.0
2
+ langchain~=0.1.13
3
  # huggingface_hub
4
+ streamlit~=1.32.2
5
  clarifai==9.7.4
6
 
7
  python-pptx