barunsaha commited on
Commit
9c0dccd
1 Parent(s): 4f6ea2c

Reorganize files and set logging format globally

Browse files
app.py CHANGED
@@ -7,8 +7,7 @@ import json5
7
  import metaphor_python as metaphor
8
  import streamlit as st
9
 
10
- import llm_helper
11
- import pptx_helper
12
  from global_config import GlobalConfig
13
 
14
 
@@ -16,10 +15,7 @@ APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r', encoding='utf-8'
16
  GB_CONVERTER = 2 ** 30
17
 
18
 
19
- logging.basicConfig(
20
- level=GlobalConfig.LOG_LEVEL,
21
- format='%(asctime)s - %(message)s',
22
- )
23
 
24
 
25
  @st.cache_data
@@ -27,11 +23,11 @@ def get_contents_wrapper(text: str) -> str:
27
  """
28
  Fetch and cache the slide deck contents on a topic by calling an external API.
29
 
30
- :param text: The presentation topic
31
- :return: The slide deck contents or outline in JSON format
32
  """
33
 
34
- logging.info('LLM call because of cache miss...')
35
  return llm_helper.generate_slides_content(text).strip()
36
 
37
 
@@ -40,7 +36,7 @@ def get_metaphor_client_wrapper() -> metaphor.Metaphor:
40
  """
41
  Create a Metaphor client for semantic Web search.
42
 
43
- :return: Metaphor instance
44
  """
45
 
46
  return metaphor.Metaphor(api_key=GlobalConfig.METAPHOR_API_KEY)
@@ -51,8 +47,8 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
51
  """
52
  Fetch and cache the Web search results on a given topic.
53
 
54
- :param text: The topic
55
- :return: A list of (title, link) tuples
56
  """
57
 
58
  results = []
@@ -68,28 +64,6 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
68
  return results
69
 
70
 
71
- # def get_disk_used_percentage() -> float:
72
- # """
73
- # Compute the disk usage.
74
- #
75
- # :return: Percentage of the disk space currently used
76
- # """
77
- #
78
- # total, used, free = shutil.disk_usage(__file__)
79
- # total = total // GB_CONVERTER
80
- # used = used // GB_CONVERTER
81
- # free = free // GB_CONVERTER
82
- # used_perc = 100.0 * used / total
83
- #
84
- # logging.debug(f'Total: {total} GB\n'
85
- # f'Used: {used} GB\n'
86
- # f'Free: {free} GB')
87
- #
88
- # logging.debug('\n'.join(os.listdir()))
89
- #
90
- # return used_perc
91
-
92
-
93
  def build_ui():
94
  """
95
  Display the input elements for content generation. Only covers the first step.
@@ -160,24 +134,23 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
160
  """
161
  Process the inputs to generate the slides.
162
 
163
- :param topic: The presentation topic based on which contents are to be generated
164
- :param pptx_template: The PowerPoint template name to be used
165
- :param progress_bar: Progress bar from the page
166
- :return:
167
  """
168
 
169
  topic_length = len(topic)
170
- logging.debug('Input length:: topic: %s', topic_length)
171
 
172
  if topic_length >= 10:
173
- logging.debug('Topic: %s', topic)
174
  target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
175
 
176
  try:
177
  # Step 1: Generate the contents in JSON format using an LLM
178
  json_str = process_slides_contents(topic[:target_length], progress_bar)
179
- logging.debug('Truncated topic: %s', topic[:target_length])
180
- logging.debug('Length of JSON: %d', len(json_str))
181
 
182
  # Step 2: Generate the slide deck based on the template specified
183
  if len(json_str) > 0:
@@ -210,15 +183,15 @@ def process_slides_contents(text: str, progress_bar: st.progress) -> str:
210
  """
211
  Convert given text into structured data and display. Update the UI.
212
 
213
- :param text: The topic description for the presentation
214
- :param progress_bar: Progress bar for this step
215
- :return: The contents as a JSON-formatted string
216
  """
217
 
218
  json_str = ''
219
 
220
  try:
221
- logging.info('Calling LLM for content generation on the topic: %s', text)
222
  json_str = get_contents_wrapper(text)
223
  except Exception as ex:
224
  st.error(
@@ -239,10 +212,10 @@ def generate_slide_deck(json_str: str, pptx_template: str, progress_bar) -> List
239
  """
240
  Create a slide deck.
241
 
242
- :param json_str: The contents in JSON format
243
- :param pptx_template: The PPTX template name
244
- :param progress_bar: Progress bar
245
- :return: A list of all slide headers and the title
246
  """
247
 
248
  progress_text = 'Creating the slide deck...give it a moment'
@@ -257,7 +230,7 @@ def generate_slide_deck(json_str: str, pptx_template: str, progress_bar) -> List
257
  temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
258
  path = pathlib.Path(temp.name)
259
 
260
- logging.info('Creating PPTX file...')
261
  all_headers = pptx_helper.generate_powerpoint_presentation(
262
  json_str,
263
  slides_template=pptx_template,
@@ -279,7 +252,7 @@ def show_bonus_stuff(ppt_headers: List[str]):
279
  """
280
 
281
  # Use the presentation title and the slide headers to find relevant info online
282
- logging.info('Calling Metaphor search...')
283
  ppt_text = ' '.join(ppt_headers)
284
  search_results = get_web_search_results_wrapper(ppt_text)
285
  md_text_items = []
@@ -290,11 +263,11 @@ def show_bonus_stuff(ppt_headers: List[str]):
290
  with st.expander('Related Web references'):
291
  st.markdown('\n\n'.join(md_text_items))
292
 
293
- logging.info('Done!')
294
 
295
  # # Avoid image generation. It costs time and an API call, so just limit to the text generation.
296
  # with st.expander('AI-generated image on the presentation topic'):
297
- # logging.info('Calling SDXL for image generation...')
298
  # # img_empty.write('')
299
  # # img_text.write(APP_TEXT['image_info'])
300
  # image = get_ai_image_wrapper(ppt_text)
@@ -303,7 +276,7 @@ def show_bonus_stuff(ppt_headers: List[str]):
303
  # image = base64.b64decode(image)
304
  # st.image(image, caption=ppt_text)
305
  # st.info('Tip: Right-click on the image to save it.', icon="💡️")
306
- # logging.info('Image added')
307
 
308
 
309
  def main():
 
7
  import metaphor_python as metaphor
8
  import streamlit as st
9
 
10
+ from helpers import llm_helper, pptx_helper
 
11
  from global_config import GlobalConfig
12
 
13
 
 
15
  GB_CONVERTER = 2 ** 30
16
 
17
 
18
+ logger = logging.getLogger(__name__)
 
 
 
19
 
20
 
21
  @st.cache_data
 
23
  """
24
  Fetch and cache the slide deck contents on a topic by calling an external API.
25
 
26
+ :param text: The presentation topic.
27
+ :return: The slide deck contents or outline in JSON format.
28
  """
29
 
30
+ logger.info('LLM call because of cache miss...')
31
  return llm_helper.generate_slides_content(text).strip()
32
 
33
 
 
36
  """
37
  Create a Metaphor client for semantic Web search.
38
 
39
+ :return: Metaphor instance.
40
  """
41
 
42
  return metaphor.Metaphor(api_key=GlobalConfig.METAPHOR_API_KEY)
 
47
  """
48
  Fetch and cache the Web search results on a given topic.
49
 
50
+ :param text: The topic.
51
+ :return: A list of (title, link) tuples.
52
  """
53
 
54
  results = []
 
64
  return results
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def build_ui():
68
  """
69
  Display the input elements for content generation. Only covers the first step.
 
134
  """
135
  Process the inputs to generate the slides.
136
 
137
+ :param topic: The presentation topic based on which contents are to be generated.
138
+ :param pptx_template: The PowerPoint template name to be used.
139
+ :param progress_bar: Progress bar from the page.
 
140
  """
141
 
142
  topic_length = len(topic)
143
+ logger.debug('Input length:: topic: %s', topic_length)
144
 
145
  if topic_length >= 10:
146
+ logger.debug('Topic: %s', topic)
147
  target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
148
 
149
  try:
150
  # Step 1: Generate the contents in JSON format using an LLM
151
  json_str = process_slides_contents(topic[:target_length], progress_bar)
152
+ logger.debug('Truncated topic: %s', topic[:target_length])
153
+ logger.debug('Length of JSON: %d', len(json_str))
154
 
155
  # Step 2: Generate the slide deck based on the template specified
156
  if len(json_str) > 0:
 
183
  """
184
  Convert given text into structured data and display. Update the UI.
185
 
186
+ :param text: The topic description for the presentation.
187
+ :param progress_bar: Progress bar for this step.
188
+ :return: The contents as a JSON-formatted string.
189
  """
190
 
191
  json_str = ''
192
 
193
  try:
194
+ logger.info('Calling LLM for content generation on the topic: %s', text)
195
  json_str = get_contents_wrapper(text)
196
  except Exception as ex:
197
  st.error(
 
212
  """
213
  Create a slide deck.
214
 
215
+ :param json_str: The contents in JSON format.
216
+ :param pptx_template: The PPTX template name.
217
+ :param progress_bar: Progress bar.
218
+ :return: A list of all slide headers and the title.
219
  """
220
 
221
  progress_text = 'Creating the slide deck...give it a moment'
 
230
  temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
231
  path = pathlib.Path(temp.name)
232
 
233
+ logger.info('Creating PPTX file...')
234
  all_headers = pptx_helper.generate_powerpoint_presentation(
235
  json_str,
236
  slides_template=pptx_template,
 
252
  """
253
 
254
  # Use the presentation title and the slide headers to find relevant info online
255
+ logger.info('Calling Metaphor search...')
256
  ppt_text = ' '.join(ppt_headers)
257
  search_results = get_web_search_results_wrapper(ppt_text)
258
  md_text_items = []
 
263
  with st.expander('Related Web references'):
264
  st.markdown('\n\n'.join(md_text_items))
265
 
266
+ logger.info('Done!')
267
 
268
  # # Avoid image generation. It costs time and an API call, so just limit to the text generation.
269
  # with st.expander('AI-generated image on the presentation topic'):
270
+ # logger.info('Calling SDXL for image generation...')
271
  # # img_empty.write('')
272
  # # img_text.write(APP_TEXT['image_info'])
273
  # image = get_ai_image_wrapper(ppt_text)
 
276
  # image = base64.b64decode(image)
277
  # st.image(image, caption=ppt_text)
278
  # st.info('Tip: Right-click on the image to save it.', icon="💡️")
279
+ # logger.info('Image added')
280
 
281
 
282
  def main():
global_config.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
 
3
  from dataclasses import dataclass
@@ -38,3 +39,10 @@ class GlobalConfig:
38
  'caption': 'Marvel in a monochrome dream'
39
  }
40
  }
 
 
 
 
 
 
 
 
1
+ import logging
2
  import os
3
 
4
  from dataclasses import dataclass
 
39
  'caption': 'Marvel in a monochrome dream'
40
  }
41
  }
42
+
43
+
44
+ logging.basicConfig(
45
+ level=GlobalConfig.LOG_LEVEL,
46
+ format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
47
+ datefmt='%Y-%m-%d %H:%M:%S'
48
+ )
helpers/__init__.py ADDED
File without changes
llm_helper.py → helpers/llm_helper.py RENAMED
@@ -1,5 +1,7 @@
1
  import logging
2
  import requests
 
 
3
 
4
  from global_config import GlobalConfig
5
 
@@ -7,27 +9,45 @@ from global_config import GlobalConfig
7
  HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
8
  HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
9
 
10
- logging.basicConfig(
11
- level=GlobalConfig.LOG_LEVEL,
12
- format='%(asctime)s - %(message)s',
13
- )
14
 
15
- # llm = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
- def hf_api_query(payload: dict):
19
  """
20
  Invoke HF inference end-point API.
21
 
22
- :param payload: The prompt for the LLM and related parameters
23
- :return: The output from the LLM
24
  """
25
 
26
  try:
27
  response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
28
  result = response.json()
29
  except requests.exceptions.Timeout as te:
30
- logging.error('*** Error: hf_api_query timeout! %s', str(te))
31
  result = {}
32
 
33
  return result
@@ -37,8 +57,8 @@ def generate_slides_content(topic: str) -> str:
37
  """
38
  Generate the outline/contents of slides for a presentation on a given topic.
39
 
40
- :param topic: Topic on which slides are to be generated
41
- :return: The content in JSON format
42
  """
43
 
44
  with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
@@ -46,8 +66,8 @@ def generate_slides_content(topic: str) -> str:
46
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
47
 
48
  output = hf_api_query({
49
- "inputs": template_txt,
50
- "parameters": {
51
  'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
52
  'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
53
  'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
@@ -56,7 +76,7 @@ def generate_slides_content(topic: str) -> str:
56
  'return_full_text': False,
57
  # "repetition_penalty": 0.0001
58
  },
59
- "options": {
60
  'wait_for_model': True,
61
  'use_cache': True
62
  }
@@ -70,7 +90,7 @@ def generate_slides_content(topic: str) -> str:
70
  # logging.debug(f'{json_end_idx=}')
71
  output = output[:json_end_idx]
72
 
73
- logging.debug('generate_slides_content: output: %s', output)
74
 
75
  return output
76
 
 
1
  import logging
2
  import requests
3
+ from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
4
+ from langchain_core.language_models import LLM
5
 
6
  from global_config import GlobalConfig
7
 
 
9
  HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
10
  HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
11
 
12
+ logger = logging.getLogger(__name__)
 
 
 
13
 
14
+
15
+ def get_hf_endpoint() -> LLM:
16
+ """
17
+ Get an LLM via the HuggingFaceEndpoint.
18
+
19
+ :return: The LLM.
20
+ """
21
+
22
+ logger.debug('Getting LLM via HF endpoint')
23
+
24
+ return HuggingFaceEndpoint(
25
+ repo_id=GlobalConfig.HF_LLM_MODEL_NAME,
26
+ max_new_tokens=GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
27
+ top_k=40,
28
+ top_p=0.95,
29
+ temperature=GlobalConfig.LLM_MODEL_TEMPERATURE,
30
+ repetition_penalty=1.03,
31
+ streaming=True,
32
+ huggingfacehub_api_token=GlobalConfig.HUGGINGFACEHUB_API_TOKEN,
33
+ return_full_text=False,
34
+ stop_sequences=['</s>'],
35
+ )
36
 
37
 
38
+ def hf_api_query(payload: dict) -> dict:
39
  """
40
  Invoke HF inference end-point API.
41
 
42
+ :param payload: The prompt for the LLM and related parameters.
43
+ :return: The output from the LLM.
44
  """
45
 
46
  try:
47
  response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
48
  result = response.json()
49
  except requests.exceptions.Timeout as te:
50
+ logger.error('*** Error: hf_api_query timeout! %s', str(te))
51
  result = {}
52
 
53
  return result
 
57
  """
58
  Generate the outline/contents of slides for a presentation on a given topic.
59
 
60
+ :param topic: Topic on which slides are to be generated.
61
+ :return: The content in JSON format.
62
  """
63
 
64
  with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
 
66
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
67
 
68
  output = hf_api_query({
69
+ 'inputs': template_txt,
70
+ 'parameters': {
71
  'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
72
  'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
73
  'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
 
76
  'return_full_text': False,
77
  # "repetition_penalty": 0.0001
78
  },
79
+ 'options': {
80
  'wait_for_model': True,
81
  'use_cache': True
82
  }
 
90
  # logging.debug(f'{json_end_idx=}')
91
  output = output[:json_end_idx]
92
 
93
+ logger.debug('generate_slides_content: output: %s', output)
94
 
95
  return output
96
 
pptx_helper.py → helpers/pptx_helper.py RENAMED
@@ -2,6 +2,7 @@ import logging
2
  import pathlib
3
  import re
4
  import tempfile
 
5
  from typing import List, Tuple
6
 
7
  import json5
@@ -28,17 +29,14 @@ SAMPLE_JSON_FOR_PPTX = '''
28
  }
29
  '''
30
 
31
- logging.basicConfig(
32
- level=GlobalConfig.LOG_LEVEL,
33
- format='%(asctime)s - %(message)s',
34
- )
35
 
36
 
37
  def remove_slide_number_from_heading(header: str) -> str:
38
  """
39
  Remove the slide number from a given slide header.
40
 
41
- :param header: The header of a slide
42
  """
43
 
44
  if PATTERN.match(header):
@@ -56,16 +54,16 @@ def generate_powerpoint_presentation(
56
  """
57
  Create and save a PowerPoint presentation file containing the content in JSON format.
58
 
59
- :param structured_data: The presentation contents as "JSON" (may contain trailing commas)
60
- :param slides_template: The PPTX template to use
61
- :param output_file_path: The path of the PPTX file to save as
62
- :return A list of presentation title and slides headers
63
  """
64
 
65
  # The structured "JSON" might contain trailing commas, so using json5
66
  parsed_data = json5.loads(structured_data)
67
 
68
- logging.debug(
69
  "*** Using PPTX template: %s",
70
  GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
71
  )
@@ -77,7 +75,7 @@ def generate_powerpoint_presentation(
77
  title = slide.shapes.title
78
  subtitle = slide.placeholders[1]
79
  title.text = parsed_data['title']
80
- logging.debug('Presentation title is: %s', title.text)
81
  subtitle.text = 'by Myself and SlideDeck AI :)'
82
  all_headers = [title.text, ]
83
 
@@ -125,9 +123,9 @@ def get_flat_list_of_contents(items: list, level: int) -> List[Tuple]:
125
  Flatten a (hierarchical) list of bullet points to a single list containing each item and
126
  its level.
127
 
128
- :param items: A bullet point (string or list)
129
- :param level: The current level of hierarchy
130
- :return: A list of (bullet item text, hierarchical level) tuples
131
  """
132
 
133
  flat_list = []
 
2
  import pathlib
3
  import re
4
  import tempfile
5
+
6
  from typing import List, Tuple
7
 
8
  import json5
 
29
  }
30
  '''
31
 
32
+ logger = logging.getLogger(__name__)
 
 
 
33
 
34
 
35
  def remove_slide_number_from_heading(header: str) -> str:
36
  """
37
  Remove the slide number from a given slide header.
38
 
39
+ :param header: The header of a slide.
40
  """
41
 
42
  if PATTERN.match(header):
 
54
  """
55
  Create and save a PowerPoint presentation file containing the content in JSON format.
56
 
57
+ :param structured_data: The presentation contents as "JSON" (may contain trailing commas).
58
+ :param slides_template: The PPTX template to use.
59
+ :param output_file_path: The path of the PPTX file to save as.
60
+ :return A list of presentation title and slides headers.
61
  """
62
 
63
  # The structured "JSON" might contain trailing commas, so using json5
64
  parsed_data = json5.loads(structured_data)
65
 
66
+ logger.debug(
67
  "*** Using PPTX template: %s",
68
  GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
69
  )
 
75
  title = slide.shapes.title
76
  subtitle = slide.placeholders[1]
77
  title.text = parsed_data['title']
78
+ logger.debug('Presentation title is: %s', title.text)
79
  subtitle.text = 'by Myself and SlideDeck AI :)'
80
  all_headers = [title.text, ]
81
 
 
123
  Flatten a (hierarchical) list of bullet points to a single list containing each item and
124
  its level.
125
 
126
+ :param items: A bullet point (string or list).
127
+ :param level: The current level of hierarchy.
128
+ :return: A list of (bullet item text, hierarchical level) tuples.
129
  """
130
 
131
  flat_list = []