hlydecker commited on
Commit
3360e0e
1 Parent(s): 9313d3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -153
app.py CHANGED
@@ -2,28 +2,40 @@ import gradio as gr
2
  import subprocess
3
  import json
4
  import requests
 
 
 
5
  from bs4 import BeautifulSoup
6
 
7
- """
8
- General helper functions
9
- """
10
 
11
  def strip_html_tags(html_text):
12
  # Use BeautifulSoup to parse and clean HTML content
13
  soup = BeautifulSoup(html_text, 'html.parser')
14
  return soup.get_text()
15
 
16
- """
17
- Padlet API Interactions
18
- """
19
- def api_call(input_text):
20
 
21
- #TODO: Refactor to be one function that can get OR post
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  curl_command = [
24
  'curl', '-s', '--request', 'GET',
25
  '--url', f"https://api.padlet.dev/v1/boards/{board_id}?include=posts%2Csections",
26
- '--header', 'X-Api-Key: pdltp_0e380a0de1ff32d77b12dbcc030b1373199b7525681ddc81bd1b9ef3e4e3dd49577a23',
27
  '--header', 'accept: application/vnd.api+json'
28
  ]
29
 
@@ -34,6 +46,7 @@ def api_call(input_text):
34
  # Extract the contents of all posts, stripping HTML tags from bodyHtml
35
  posts_data = response_data.get("included", [])
36
  post_contents = []
 
37
  for post in posts_data:
38
  if post.get("type") == "post":
39
  attributes = post.get("attributes", {}).get("content", {})
@@ -41,23 +54,23 @@ def api_call(input_text):
41
  body_html = attributes.get("bodyHtml", "")
42
 
43
  if subject:
44
- post_content = f"Subject: {subject}"
45
- if body_html:
46
- cleaned_body = strip_html_tags(body_html)
47
- post_content += f"\nBody Text: {cleaned_body}"
48
-
49
- post_contents.append(post_content)
50
-
51
- return "\n\n".join(post_contents) if post_contents else "No post contents found."
52
  except subprocess.CalledProcessError:
53
- return "Error: Unable to fetch data using cURL."
54
 
55
- def create_post(board_id, post_content):
56
 
57
  curl_command = [
58
  'curl', '-s', '--request', 'POST',
59
  '--url', f"https://api.padlet.dev/v1/boards/{board_id}/posts",
60
- '--header', 'X-Api-Key: pdltp_0e380a0de1ff32d77b12dbcc030b1373199b7525681ddc81bd1b9ef3e4e3dd49577a23',
61
  '--header', 'accept: application/vnd.api+json',
62
  '--header', 'content-type: application/vnd.api+json',
63
  '--data',
@@ -66,7 +79,8 @@ def create_post(board_id, post_content):
66
  "type": "post",
67
  "attributes": {
68
  "content": {
69
- "subject": post_content
 
70
  }
71
  }
72
  }
@@ -80,143 +94,66 @@ def create_post(board_id, post_content):
80
  except subprocess.CalledProcessError as e:
81
  return f"Error: Unable to create post - {str(e)}"
82
 
83
- """
84
- LLM Functions
85
- """
86
-
87
- #Streaming endpoint
88
- API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
89
 
90
- #Inference function
91
- def predict(openai_gpt4_key, system_msg, api_result, top_p, temperature, chat_counter, chatbot=[], history=[]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- headers = {
94
- "Content-Type": "application/json",
95
- "Authorization": f"Bearer {openai_gpt4_key}" #Users will provide their own OPENAI_API_KEY
96
- }
97
- print(f"system message is ^^ {system_msg}")
98
- if system_msg.strip() == '':
99
- initial_message = [{"role": "user", "content": f"{inputs}"},]
100
- multi_turn_message = []
101
- else:
102
- initial_message= [{"role": "system", "content": system_msg},
103
- {"role": "user", "content": f"{inputs}"},]
104
- multi_turn_message = [{"role": "system", "content": system_msg},]
105
-
106
- if chat_counter == 0 :
107
- payload = {
108
- "model": "gpt-4",
109
- "messages": initial_message ,
110
- "temperature" : 1.0,
111
- "top_p":1.0,
112
- "n" : 1,
113
- "stream": True,
114
- "presence_penalty":0,
115
- "frequency_penalty":0,
116
- }
117
- print(f"chat_counter - {chat_counter}")
118
- else: #if chat_counter != 0 :
119
- messages=multi_turn_message # Of the type of - [{"role": "system", "content": system_msg},]
120
- for data in chatbot:
121
- user = {}
122
- user["role"] = "user"
123
- user["content"] = data[0]
124
- assistant = {}
125
- assistant["role"] = "assistant"
126
- assistant["content"] = data[1]
127
- messages.append(user)
128
- messages.append(assistant)
129
- temp = {}
130
- temp["role"] = "user"
131
- temp["content"] = inputs
132
- messages.append(temp)
133
- #messages
134
- payload = {
135
- "model": "gpt-4",
136
- "messages": messages, # Of the type of [{"role": "user", "content": f"{inputs}"}],
137
- "temperature" : temperature, #1.0,
138
- "top_p": top_p, #1.0,
139
- "n" : 1,
140
- "stream": True,
141
- "presence_penalty":0,
142
- "frequency_penalty":0,}
143
-
144
- chat_counter+=1
145
-
146
- history.append(inputs)
147
- print(f"Logging : payload is - {payload}")
148
- # make a POST request to the API endpoint using the requests.post method, passing in stream=True
149
- response = requests.post(API_URL, headers=headers, json=payload, stream=True)
150
- print(f"Logging : response code - {response}")
151
- token_counter = 0
152
- partial_words = ""
153
-
154
- counter=0
155
- for chunk in response.iter_lines():
156
- #Skipping first chunk
157
- if counter == 0:
158
- counter+=1
159
- continue
160
- # check whether each line is non-empty
161
- if chunk.decode() :
162
- chunk = chunk.decode()
163
- # decode each line as response data is in bytes
164
- if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
165
- partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
166
- if token_counter == 0:
167
- history.append(" " + partial_words)
168
- else:
169
- history[-1] = partial_words
170
- chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
171
- token_counter+=1
172
- yield chat, history, chat_counter, response # resembles {chatbot: chat, state: history}
173
-
174
- #Resetting to blank
175
- def reset_textbox():
176
- return gr.update(value='')
177
-
178
- #to set a component as visible=False
179
- def set_visible_false():
180
- return gr.update(visible=False)
181
-
182
- #to set a component as visible=True
183
- def set_visible_true():
184
- return gr.update(visible=True)
185
-
186
- # Create a Gradio Blocks interface
187
- title = "<h1>Padlet Summary Tool</h1>"
188
  iface = gr.Interface(
189
- fn=predict, # Use 'predict' as the function
190
  inputs=[
191
- gr.blocks.Textbox(label="OpenAI GPT4 Key"),
192
- gr.blocks.Textbox(label="System Message"),
193
- gr.blocks.Textbox(label="API Result"),
194
- gr.blocks.Textbox(label="Board ID to Fetch Data From", name="fetch_board_id"),
195
- gr.blocks.Textbox(label="Board ID to Post Summary To", name="post_board_id"),
196
  ],
197
- outputs=[
198
- gr.blocks.Textbox(label="Summary"),
199
- gr.blocks.Textbox(label="API Response"),
200
- ],
201
- title=title,
202
- live=True,
203
  )
204
 
205
- # Add event handlers to call 'create_post' when the "Create Post" button is clicked
206
- @gr.blocks.Button(label="Create Post")
207
- def create_post(inputs, output):
208
- openai_gpt4_key, system_msg, api_result, fetch_board_id, post_board_id = (
209
- inputs["OpenAI GPT4 Key"],
210
- inputs["System Message"],
211
- inputs["API Result"],
212
- inputs["fetch_board_id"],
213
- inputs["post_board_id"],
214
- )
215
- api_data = api_call(fetch_board_id)
216
- summary = predict(openai_gpt4_key, system_msg, api_data, 1.0, 1.0, 0)
217
- response = create_post(post_board_id, summary)
218
- output["Summary"] = summary
219
- output["API Response"] = response
220
-
221
- # Launch the interface
222
  iface.launch()
 
2
  import subprocess
3
  import json
4
  import requests
5
+ import re
6
+ import pandas as pd
7
+ import openai
8
  from bs4 import BeautifulSoup
9
 
10
+ # Simple function to strip html
 
 
11
 
12
  def strip_html_tags(html_text):
13
  # Use BeautifulSoup to parse and clean HTML content
14
  soup = BeautifulSoup(html_text, 'html.parser')
15
  return soup.get_text()
16
 
17
+ def html_posts_to_table(html_posts):
 
 
 
18
 
19
+ subject_pattern = r"Subject: (.*?)\n"
20
+ body_text_pattern = r"Body Text: (.*?)\n"
21
 
22
+ subjects = re.findall(subject_pattern, html_posts)
23
+ body_texts = re.findall(body_text_pattern, html_posts)
24
+
25
+ data = {
26
+ 'Subject': subjects,
27
+ 'Body Text': body_texts
28
+ }
29
+
30
+ df = pd.DataFrame(data)
31
+
32
+ return(df)
33
+
34
+ def api_call(board_id, api_key):
35
  curl_command = [
36
  'curl', '-s', '--request', 'GET',
37
  '--url', f"https://api.padlet.dev/v1/boards/{board_id}?include=posts%2Csections",
38
+ '--header', f"X-Api-Key: {api_key}",
39
  '--header', 'accept: application/vnd.api+json'
40
  ]
41
 
 
46
  # Extract the contents of all posts, stripping HTML tags from bodyHtml
47
  posts_data = response_data.get("included", [])
48
  post_contents = []
49
+
50
  for post in posts_data:
51
  if post.get("type") == "post":
52
  attributes = post.get("attributes", {}).get("content", {})
 
54
  body_html = attributes.get("bodyHtml", "")
55
 
56
  if subject:
57
+ cleaned_body = strip_html_tags(body_html)
58
+ post_contents.append({"subject": subject, "content": cleaned_body})
59
+
60
+ if post_contents:
61
+ df = pd.DataFrame(post_contents)
62
+ return df
63
+ else:
64
+ return pd.DataFrame({"subject": ["No post contents found."], "content": [""]})
65
  except subprocess.CalledProcessError:
66
+ return pd.DataFrame({"subject": ["Error: Unable to fetch data using cURL."], "content": [""]})
67
 
68
+ def create_post(subject, post_content, board_id, api_key):
69
 
70
  curl_command = [
71
  'curl', '-s', '--request', 'POST',
72
  '--url', f"https://api.padlet.dev/v1/boards/{board_id}/posts",
73
+ '--header', f"X-Api-Key: {api_key}",
74
  '--header', 'accept: application/vnd.api+json',
75
  '--header', 'content-type: application/vnd.api+json',
76
  '--data',
 
79
  "type": "post",
80
  "attributes": {
81
  "content": {
82
+ "subject": subject,
83
+ "body": post_content
84
  }
85
  }
86
  }
 
94
  except subprocess.CalledProcessError as e:
95
  return f"Error: Unable to create post - {str(e)}"
96
 
97
+ def posts_to_prompt(padlet_posts):
98
+ post_prompt = padlet_posts.apply(lambda row: f"{row['subject']} {row['content']}", axis=1).str.cat(sep=', ')
99
+ return post_prompt
 
 
 
100
 
101
+ def remove_html_tags(text):
102
+ # Use a regular expression to remove HTML tags
103
+ clean = re.compile('<.*?>')
104
+ return re.sub(clean, '', text)
105
+
106
+ def summarize_padlet_posts(padlet_posts, openai_api_key):
107
+ # Concatenate padlet post df
108
+ post_prompt = posts_to_prompt(padlet_posts)
109
+
110
+ # Set the system prompt with more specific instructions
111
+ system_prompt = (
112
+ "You are an AI assistant tasked with summarizing the main points of the following Padlet posts. "
113
+ "Please provide a concise summary of the posts based on their content."
114
+ )
115
+
116
+ # Set the prompt for the GPT-3.5 model
117
+ prompt = system_prompt + "\n" + post_prompt # Added a newline after system_prompt
118
+
119
+ try:
120
+ # Make the API call to GPT-3.5
121
+ response = openai.Completion.create(
122
+ engine="text-davinci-003", # GPT-3.5 engine
123
+ prompt=prompt,
124
+ max_tokens=1000, # Limit response length for concise summaries
125
+ api_key=openai_api_key,
126
+ temperature=0.5 # Adjust temperature as needed
127
+ )
128
+
129
+ # Extract and return the summary, removing leading newlines and HTML tags
130
+ summary = response.choices[0].text.lstrip('\n')
131
+ summary = remove_html_tags(summary)
132
+ return summary
133
+ except Exception as e:
134
+ return f"Error: {str(e)}"
135
+
136
+ def summarize_padlets(input_board_id, output_board_id, padlet_api, openai_api):
137
+
138
+ posts_to_summarize = api_call(input_board_id, padlet_api)
139
+
140
+ summary = summarize_padlet_posts(posts_to_summarize, openai_api)
141
+
142
+ create_post("Summary",summary, output_board_id, padlet_api)
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  iface = gr.Interface(
145
+ fn=summarize_padlets,
146
  inputs=[
147
+ gr.inputs.Textbox(label="Input Board ID"),
148
+ gr.inputs.Textbox(label="Output Board ID"),
149
+ gr.inputs.Textbox(label="Padlet API Key", type="password"),
150
+ gr.inputs.Textbox(label="OpenAI API Key", type="password", placeholder="sk.."),
 
151
  ],
152
+ outputs=gr.outputs.Textbox(label="Summary"),
153
+ live=False, # Set to True to show the result without clicking a button
154
+ title="Padlet Summarization",
155
+ description="Summarize Padlet posts and create a summary post on another board using OpenAI GPT3.5.",
 
 
156
  )
157
 
158
+ # Run the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  iface.launch()