barunsaha commited on
Commit
724babe
·
1 Parent(s): 7b01107

Switch to Mistral-7B-Instruct-v0.1; invoke HF API end-point directly

Browse files
app.py CHANGED
@@ -84,26 +84,26 @@ def get_ai_image_wrapper(text: str) -> str:
84
  return llm_helper.get_ai_image(text)
85
 
86
 
87
- def get_disk_used_percentage() -> float:
88
- """
89
- Compute the disk usage.
90
-
91
- :return: Percentage of the disk space currently used
92
- """
93
-
94
- total, used, free = shutil.disk_usage(__file__)
95
- total = total // GB_CONVERTER
96
- used = used // GB_CONVERTER
97
- free = free // GB_CONVERTER
98
- used_perc = 100.0 * used / total
99
-
100
- logging.debug(f'Total: {total} GB\n'
101
- f'Used: {used} GB\n'
102
- f'Free: {free} GB')
103
-
104
- logging.debug('\n'.join(os.listdir()))
105
-
106
- return used_perc
107
 
108
 
109
  def build_ui():
@@ -115,8 +115,11 @@ def build_ui():
115
 
116
  st.title(APP_TEXT['app_name'])
117
  st.subheader(APP_TEXT['caption'])
118
- st.markdown('*Running on GPT-4 at the moment. Image generation has been disabled. '
119
- 'Will run as long as the community plan supports* :)')
 
 
 
120
 
121
  with st.form('my_form'):
122
  # Topic input
@@ -188,6 +191,8 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
188
  try:
189
  # Step 1: Generate the contents in JSON format using an LLM
190
  json_str = process_slides_contents(topic[:target_length], progress_bar)
 
 
191
 
192
  # Step 2: Generate the slide deck based on the template specified
193
  if len(json_str) > 0:
 
84
  return llm_helper.get_ai_image(text)
85
 
86
 
87
+ # def get_disk_used_percentage() -> float:
88
+ # """
89
+ # Compute the disk usage.
90
+ #
91
+ # :return: Percentage of the disk space currently used
92
+ # """
93
+ #
94
+ # total, used, free = shutil.disk_usage(__file__)
95
+ # total = total // GB_CONVERTER
96
+ # used = used // GB_CONVERTER
97
+ # free = free // GB_CONVERTER
98
+ # used_perc = 100.0 * used / total
99
+ #
100
+ # logging.debug(f'Total: {total} GB\n'
101
+ # f'Used: {used} GB\n'
102
+ # f'Free: {free} GB')
103
+ #
104
+ # logging.debug('\n'.join(os.listdir()))
105
+ #
106
+ # return used_perc
107
 
108
 
109
  def build_ui():
 
115
 
116
  st.title(APP_TEXT['app_name'])
117
  st.subheader(APP_TEXT['caption'])
118
+ st.markdown('Using [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1).')
119
+ st.markdown('*If the JSON is generated or parsed incorrectly, try again later by making minor changes '
120
+ 'to the input text.*')
121
+ st.markdown('~~*Running on GPT-4 at the moment. Image generation has been disabled. '
122
+ 'Will run as long as the community plan supports* :)~~')
123
 
124
  with st.form('my_form'):
125
  # Topic input
 
191
  try:
192
  # Step 1: Generate the contents in JSON format using an LLM
193
  json_str = process_slides_contents(topic[:target_length], progress_bar)
194
+ logging.debug(f'{topic[:target_length]=}')
195
+ logging.debug(f'{json_str=}')
196
 
197
  # Step 2: Generate the slide deck based on the template specified
198
  if len(json_str) > 0:
global_config.py CHANGED
@@ -22,11 +22,13 @@ class GlobalConfig:
22
  CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
23
  CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
24
 
25
- # LLM_MODEL_TEMPERATURE: float = 0.5
 
26
  LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
27
  LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
28
  LLM_MODEL_MAX_INPUT_LENGTH: int = 300
29
 
 
30
  METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
31
 
32
  LOG_LEVEL = 'INFO'
 
22
  CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
23
  CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
24
 
25
+ HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.1' # 'HuggingFaceH4/zephyr-7b-beta'
26
+ LLM_MODEL_TEMPERATURE: float = 0.2 # Mistral and Zephyr require a bit high temperature
27
  LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
28
  LLM_MODEL_MAX_OUTPUT_LENGTH: int = 2000
29
  LLM_MODEL_MAX_INPUT_LENGTH: int = 300
30
 
31
+ HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN', '')
32
  METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY', '')
33
 
34
  LOG_LEVEL = 'INFO'
langchain_templates/template_combined.txt CHANGED
@@ -1,4 +1,5 @@
1
  You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
 
2
 
3
 
4
  Topic:
@@ -6,7 +7,6 @@ Topic:
6
 
7
 
8
  Desired JSON output format:
9
-
10
  {
11
  "title": "Presentation Title",
12
  "slides": [
@@ -16,7 +16,7 @@ Desired JSON output format:
16
  "First bullet point",
17
  [
18
  "Sub-bullet point 1",
19
- "Sub-bullet point 1"
20
  ],
21
  "Second bullet point"
22
  ]
 
1
  You are a helpful, intelligent chatbot. Create the slides for a presentation on the given topic. Include main headings for each slide, detailed bullet points for each slide. Add relevant content to each slide.
2
+ If relevant, add one or two examples to illustrate the concept.
3
 
4
 
5
  Topic:
 
7
 
8
 
9
  Desired JSON output format:
 
10
  {
11
  "title": "Presentation Title",
12
  "slides": [
 
16
  "First bullet point",
17
  [
18
  "Sub-bullet point 1",
19
+ "Sub-bullet point 2"
20
  ],
21
  "Second bullet point"
22
  ]
llm_helper.py CHANGED
@@ -7,17 +7,20 @@ from langchain.llms import Clarifai
7
  from global_config import GlobalConfig
8
 
9
 
 
 
 
10
  logging.basicConfig(
11
  level=GlobalConfig.LOG_LEVEL,
12
  format='%(asctime)s - %(message)s',
13
  )
14
 
15
- llm = None
16
 
17
 
18
  def get_llm(use_gpt: bool) -> Clarifai:
19
  """
20
- Get a large language model.
21
 
22
  :param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
23
  """
@@ -45,28 +48,58 @@ def get_llm(use_gpt: bool) -> Clarifai:
45
  return _
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def generate_slides_content(topic: str) -> str:
49
  """
50
  Generate the outline/contents of slides for a presentation on a given topic.
51
 
52
- :param topic: Topic/subject matter/idea on which slides are to be generated
53
  :return: The content in JSON format
54
  """
55
 
56
- # global prompt
57
- global llm
58
-
59
  with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
60
  template_txt = in_file.read().strip()
61
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
62
 
63
- if llm is None:
64
- llm = get_llm(use_gpt=True)
65
- print(llm)
66
-
67
- slides_content = llm(template_txt, verbose=True)
68
-
69
- return slides_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  def get_ai_image(text: str) -> str:
 
7
  from global_config import GlobalConfig
8
 
9
 
10
+ HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
11
+ HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
12
+
13
  logging.basicConfig(
14
  level=GlobalConfig.LOG_LEVEL,
15
  format='%(asctime)s - %(message)s',
16
  )
17
 
18
+ # llm = None
19
 
20
 
21
  def get_llm(use_gpt: bool) -> Clarifai:
22
  """
23
+ Get a large language model (hosted by Clarifai).
24
 
25
  :param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
26
  """
 
48
  return _
49
 
50
 
51
+ def hf_api_query(payload: dict):
52
+ """
53
+ Invoke HF inference end-point API.
54
+
55
+ :param payload: The prompt for the LLM and related parameters
56
+ :return: The output from the LLM
57
+ """
58
+
59
+ logging.debug(f'{payload=}')
60
+ response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload)
61
+ return response.json()
62
+
63
+
64
  def generate_slides_content(topic: str) -> str:
65
  """
66
  Generate the outline/contents of slides for a presentation on a given topic.
67
 
68
+ :param topic: Topic on which slides are to be generated
69
  :return: The content in JSON format
70
  """
71
 
 
 
 
72
  with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
73
  template_txt = in_file.read().strip()
74
  template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
75
 
76
+ output = hf_api_query({
77
+ "inputs": template_txt,
78
+ "parameters": {
79
+ 'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
80
+ 'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
81
+ 'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
82
+ 'max_new_tokens': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
83
+ 'num_return_sequences': 1,
84
+ 'return_full_text': False,
85
+ # "repetition_penalty": 0.0001
86
+ },
87
+ "options": {
88
+ 'wait_for_model': True,
89
+ 'use_cache': True
90
+ }
91
+ })
92
+
93
+ output = output[0]['generated_text'].strip()
94
+ # output = output[len(template_txt):]
95
+ logging.debug(f'{output=}')
96
+
97
+ json_end_idx = output.rfind('```')
98
+ if json_end_idx != -1:
99
+ # logging.debug(f'{json_end_idx=}')
100
+ output = output[:json_end_idx]
101
+
102
+ return output
103
 
104
 
105
  def get_ai_image(text: str) -> str: