budhadityac24 commited on
Commit
5b8d529
1 Parent(s): 6aed8e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -8
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  import os
3
  import json
4
  from pypdf import PdfReader
@@ -65,7 +66,7 @@ def obsjsoncreate(json_template,text,ogtext):
65
  }
66
  ],
67
  temperature=0.21,
68
- max_tokens=2048,
69
  top_p=1,
70
  stream=True,
71
  stop=None,
@@ -87,7 +88,7 @@ def obsjsoncreate(json_template,text,ogtext):
87
  }
88
  ],
89
  temperature=0.21,
90
- max_tokens=2048,
91
  top_p=1,
92
  stream=True,
93
  stop=None,
@@ -112,7 +113,7 @@ def bizobjjsoncreate(json_template,text):
112
  }
113
  ],
114
  temperature=0.21,
115
- max_tokens=2048,
116
  top_p=1,
117
  stream=True,
118
  stop=None,
@@ -155,7 +156,7 @@ def question_create(json_template):
155
  messages=[
156
  {
157
  "role": "system",
158
- "content": "You are an experienced writer. You will be given an array of questions. \nSome questions will ask to upload images. Ignore any of these type of questions.\nSome questions ask about different identities or descriptions of the same thing. I want you o merge the questions so as to ask input from them once.\nConvert all questions so that more of a professional but also a bit of a funny tone is maintained. \nRETURN AN ARRAY OF THE QUESTIONS ONLY. DO NOT RETURN ANYTHING ELSE. "
159
  },
160
  {
161
  "role": "user",
@@ -163,7 +164,7 @@ def question_create(json_template):
163
  }
164
  ],
165
  temperature=0.73,
166
- max_tokens=5840,
167
  top_p=1,
168
  stream=True,
169
  stop=None,
@@ -207,7 +208,7 @@ def answer_refill(questions,answers,obs_json_template,bizobj_json_template):
207
  messages=[
208
  {
209
  "role": "system",
210
- "content": "You are a helpful assistant. You will be given a Question-answer pair. You will be given a json. Some subproperties in the JSONs labelled \"User Answer\" are marked as TBD. Based on the question answer pair, I want you to fill the Answer of the question answer pair as it is into the \"User answer\" subproperty. Make sure you return the full JSON, without missing any field. Then return the final completely filled JSONs. DONT OUTPUT ANYTHING OTHER THAN THE JSONS."
211
  },
212
  {
213
  "role": "user",
@@ -215,7 +216,7 @@ def answer_refill(questions,answers,obs_json_template,bizobj_json_template):
215
  }
216
  ],
217
  temperature=1,
218
- max_tokens=7610,
219
  top_p=1,
220
  stream=True,
221
  stop=None,
@@ -254,7 +255,89 @@ def executive_summary(json_template):
254
  final_summ+=chunk.choices[0].delta.content or ""
255
  return final_summ
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  def main():
259
  st.title("Qualitas Sales Data Collection Chatbot")
260
  st.caption("Welcome to the Qualitas Bot. First upload a PDF document which should be customer correspondence, detailing some requirements. Also sometimes the Submit button for the questions is a bit sticky. So You might have to click it twice!")
@@ -303,7 +386,18 @@ def process_document(uploaded_file):
303
  st.session_state.bizobj = final_bizobj_json
304
  questionobs = question_create(final_obs_json)
305
  questionbizobj = question_create(final_bizobj_json)
306
- st.session_state.questions = ast.literal_eval(questionbizobj) + ast.literal_eval(questionobs)
 
 
 
 
 
 
 
 
 
 
 
307
  st.write(st.session_state.questions)
308
  # Mark file as processed
309
  st.session_state.file_processed = True
@@ -350,6 +444,8 @@ def show_question():
350
  # st.write(answers)
351
 
352
  completed_json = answer_refill(st.session_state.questions, answers, st.session_state.obs, st.session_state.bizobj)
 
 
353
  exec_summ = executive_summary(completed_json)
354
  st.write(exec_summ)
355
 
 
1
  import streamlit as st
2
+ import requests
3
  import os
4
  import json
5
  from pypdf import PdfReader
 
66
  }
67
  ],
68
  temperature=0.21,
69
+ max_tokens=8000,
70
  top_p=1,
71
  stream=True,
72
  stop=None,
 
88
  }
89
  ],
90
  temperature=0.21,
91
+ max_tokens=8000,
92
  top_p=1,
93
  stream=True,
94
  stop=None,
 
113
  }
114
  ],
115
  temperature=0.21,
116
+ max_tokens=8000,
117
  top_p=1,
118
  stream=True,
119
  stop=None,
 
156
  messages=[
157
  {
158
  "role": "system",
159
+ "content": "You are an experienced writer. You will be given an array of questions. \nSome questions will ask to upload images. Ignore any of these type of questions.\nSome questions ask about different identities or descriptions of the same thing. I want you to merge the questions so as to ask input from them once.\nConvert all questions so that more of a professional but also a bit of a funny tone is maintained. AVOID REDUNDANCY. DONT RETURN MORE THAN 15 QUESTIONS.\nRETURN AN ARRAY OF THE QUESTIONS ONLY. DO NOT RETURN ANYTHING ELSE. "
160
  },
161
  {
162
  "role": "user",
 
164
  }
165
  ],
166
  temperature=0.73,
167
+ max_tokens=2240,
168
  top_p=1,
169
  stream=True,
170
  stop=None,
 
208
  messages=[
209
  {
210
  "role": "system",
211
+ "content": "You are a helpful assistant. You will be given a Question-answer pair. You will be given a json. Some subproperties in the JSONs labelled \"User Answer\" are marked as TBD. Based on the question answer pair, I want you to fill the Answer of the question answer pair as it is into the \"User answer\" subproperty. Make sure you return the full JSON, without missing any field. After filling, merge the two filled JSONs. Then return the final completely filled JSON. DONT OUTPUT ANYTHING OTHER THAN THE JSONS."
212
  },
213
  {
214
  "role": "user",
 
216
  }
217
  ],
218
  temperature=1,
219
+ max_tokens=8000,
220
  top_p=1,
221
  stream=True,
222
  stop=None,
 
255
  final_summ+=chunk.choices[0].delta.content or ""
256
  return final_summ
257
 
258
+ def chunk_data(data, chunk_size=10):
259
+ if isinstance(data, dict):
260
+ # If data is a dictionary, convert it to a list of key-value pairs
261
+ items = list(data.items())
262
+ elif isinstance(data, list):
263
+ items = data
264
+ else:
265
+ raise TypeError("Data must be either a dictionary or a list")
266
+
267
+ return [dict(items[i:i + chunk_size]) for i in range(0, len(items), chunk_size)]
268
+
269
+ def airtable_write(json_template):
270
+
271
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
272
 
273
+ # Groq inference
274
+ completion = client.chat.completions.create(
275
+ model="llama-3.1-70b-versatile",
276
+ messages=[
277
+ {
278
+ "role": "system",
279
+ "content": "You are a helpful assistant. You will be given a unstructured JSON. I want you to convert it into a fully structured JSON which will become a structured CSV. The headings of the CSV are to be \\\"Category\\\",\\\"Sub-category\\\",\\\"Description\\\" and \\\"User Answer\\\". So shuffle around the fields accordingly. \nFields marked \"Category\" are to be directly picked as the \"Category\" for the CSV. If there is \"Observation type\", then that becomes the Category. \nDONT LEAVE ANY FIELD. MAKE SURE ALL FIELDS ARE INCLUDED IN THE RESULT. DONT OUTPUT ANYTHING OTHER THAN THE JSON. ONLY OUTPUT THE JSON.\n"
280
+ },
281
+ {
282
+ "role": "user",
283
+ "content": json_template
284
+ }
285
+ ],
286
+ temperature=0.25,
287
+ max_tokens=8000,
288
+ top_p=1,
289
+ stream=True,
290
+ # response_format={"type": "json_object"},
291
+ stop=None,
292
+ )
293
+ content=""
294
+ for chunk in completion:
295
+ content+=chunk.choices[0].delta.content or ""
296
+ # Get the structured JSON from Groq
297
+ groq_json = json.loads(content)
298
+ with open("groq_json.json", "w") as file:
299
+ json.dump(groq_json, file, indent=4)
300
+ API_KEY = os.getenv("AIRTABLE_KEY")
301
+ BASE_ID = 'appcl0egQeE4pP5ID'
302
+ TABLE_ID = 'tbl2AaOSxyBv6ObR5'
303
+ url = f'https://api.airtable.com/v0/{BASE_ID}/{TABLE_ID}'
304
+
305
+ headers = {
306
+ 'Authorization': f'Bearer {API_KEY}',
307
+ 'Content-Type': 'application/json'
308
+ }
309
+
310
+ # Chunk the data into batches of 10
311
+ def chunk_data(data, chunk_size=10):
312
+ for i in range(0, len(data), chunk_size):
313
+ yield data[i:i + chunk_size]
314
+
315
+ # Process each chunk and send it to Airtable
316
+ for batch in chunk_data(groq_json):
317
+ # Format the current batch for Airtable API
318
+ airtable_data = {
319
+ "records": [
320
+ {
321
+ "fields": {
322
+ "Category": item["Category"],
323
+ "Sub-category": item["Sub-category"],
324
+ "Description": item["Description"],
325
+ "User Answer": item["User Answer"]
326
+ }
327
+ } for item in batch
328
+ ]
329
+ }
330
+
331
+ # Make the POST request to add records
332
+ response = requests.post(url, headers=headers, data=json.dumps(airtable_data))
333
+
334
+ # Check if the request was successful
335
+ if response.status_code == 200:
336
+ print(f"Batch of {len(batch)} records added successfully!")
337
+ else:
338
+ print(f"Failed to add batch. Status code: {response.status_code}, Error: {response.text}")
339
+
340
+
341
  def main():
342
  st.title("Qualitas Sales Data Collection Chatbot")
343
  st.caption("Welcome to the Qualitas Bot. First upload a PDF document which should be customer correspondence, detailing some requirements. Also sometimes the Submit button for the questions is a bit sticky. So You might have to click it twice!")
 
386
  st.session_state.bizobj = final_bizobj_json
387
  questionobs = question_create(final_obs_json)
388
  questionbizobj = question_create(final_bizobj_json)
389
+ while True:
390
+ try:
391
+ # Attempt to evaluate the expressions and assign them to session state
392
+ st.session_state.questions = ast.literal_eval(questionbizobj) + ast.literal_eval(questionobs)
393
+ # If successful, break out of the loop
394
+ break
395
+ except Exception as e:
396
+ # Print the error for debugging purposes (optional)
397
+ print(f"An error occurred: {e}")
398
+ # Wait for 1 second before trying again
399
+ time.sleep(1)
400
+ continue
401
  st.write(st.session_state.questions)
402
  # Mark file as processed
403
  st.session_state.file_processed = True
 
444
  # st.write(answers)
445
 
446
  completed_json = answer_refill(st.session_state.questions, answers, st.session_state.obs, st.session_state.bizobj)
447
+ # st.write(completed_json)
448
+ airtable_write(completed_json)
449
  exec_summ = executive_summary(completed_json)
450
  st.write(exec_summ)
451