Update extract_insights.py
Browse files- extract_insights.py +103 -45
extract_insights.py
CHANGED
|
@@ -200,52 +200,110 @@ def adjust_prompt_tokens_v1(prompt: str) -> str:
|
|
| 200 |
return trimmed_text
|
| 201 |
|
| 202 |
|
| 203 |
-
async def process_synchronous_job(
|
| 204 |
"""
|
| 205 |
Background task to process the batch job
|
| 206 |
"""
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return trimmed_text
|
| 201 |
|
| 202 |
|
| 203 |
+
async def process_synchronous_job(dataset: Dict[str, Any]):
|
| 204 |
"""
|
| 205 |
Background task to process the batch job
|
| 206 |
"""
|
| 207 |
|
| 208 |
+
for ds in dataset.get('data'):
|
| 209 |
+
user_id = ds.get('user_id')
|
| 210 |
+
|
| 211 |
+
message_id = ds.get('message_id')
|
| 212 |
+
|
| 213 |
+
raw_text = ds.get('receipt_text')
|
| 214 |
+
email = ds.get('email')
|
| 215 |
+
try:
|
| 216 |
+
prompt = adjust_prompt_tokens_v1(receipt_radar_prompt(raw_text))
|
| 217 |
+
|
| 218 |
+
completion = client.chat.completions.create(
|
| 219 |
+
model="gpt-4o-mini",
|
| 220 |
+
messages=[
|
| 221 |
+
{
|
| 222 |
+
"role": "user",
|
| 223 |
+
"content": prompt
|
| 224 |
+
}
|
| 225 |
+
],
|
| 226 |
+
response_format={"type": "json_object"},
|
| 227 |
+
temperature=0.1
|
| 228 |
+
)
|
| 229 |
+
print("Printing GPT response")
|
| 230 |
+
print(completion.choices[0].message)
|
| 231 |
+
|
| 232 |
+
# inserting data into supabase
|
| 233 |
+
insert_data = json.loads(completion.choices[0].message.content)
|
| 234 |
+
insert_data['email'] = email
|
| 235 |
+
insert_data['user_id'] = user_id
|
| 236 |
+
insert_data['message_id'] = message_id
|
| 237 |
+
insert_data['total_cost'] = parse_number(insert_data['total_cost'])
|
| 238 |
+
print("Printing user_id")
|
| 239 |
+
print(user_id)
|
| 240 |
+
|
| 241 |
+
insert_response = (
|
| 242 |
+
supabase.table("receipt_radar_structured_data_duplicate")
|
| 243 |
+
.insert(insert_data)
|
| 244 |
+
.execute()
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
update_status_response = (
|
| 248 |
+
supabase.table("receipt_ocr_data")
|
| 249 |
+
.update({"status": "processing completed"})
|
| 250 |
+
.eq("message_id", message_id)
|
| 251 |
+
.eq("user_id", user_id)
|
| 252 |
+
.eq("email", email)
|
| 253 |
+
.execute()
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
print(update_status_response)
|
| 257 |
+
except Exception as e:
|
| 258 |
+
print(f"Error occurred during processing: {e}")
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
# try:
|
| 267 |
+
# prompt = receipt_radar_prompt(raw_text)
|
| 268 |
+
|
| 269 |
+
# completion = client.chat.completions.create(
|
| 270 |
+
# model="gpt-4o-mini",
|
| 271 |
+
# messages=[
|
| 272 |
+
# {
|
| 273 |
+
# "role": "user",
|
| 274 |
+
# "content": prompt
|
| 275 |
+
# }
|
| 276 |
+
# ],
|
| 277 |
+
# response_format={"type": "json_object"},
|
| 278 |
+
# temperature=0.1
|
| 279 |
+
# )
|
| 280 |
+
# print("Printing GPT response")
|
| 281 |
+
# print(completion.choices[0].message)
|
| 282 |
+
|
| 283 |
+
# # inserting data into supabase
|
| 284 |
+
# insert_data = json.loads(completion.choices[0].message.content)
|
| 285 |
+
# insert_data['email'] = email
|
| 286 |
+
# insert_data['user_id'] = user_id
|
| 287 |
+
# insert_data['message_id'] = message_id
|
| 288 |
+
# insert_data['total_cost'] = parse_number(insert_data['total_cost'])
|
| 289 |
+
# print("Printing user_id")
|
| 290 |
+
# print(user_id)
|
| 291 |
+
|
| 292 |
+
# insert_response = (
|
| 293 |
+
# supabase.table("receipt_radar_structured_data_duplicate")
|
| 294 |
+
# .insert(insert_data)
|
| 295 |
+
# .execute()
|
| 296 |
+
# )
|
| 297 |
+
|
| 298 |
+
# update_status_response = (
|
| 299 |
+
# supabase.table("receipt_ocr_data")
|
| 300 |
+
# .update({"status": "processing completed"})
|
| 301 |
+
# .eq("message_id", message_id)
|
| 302 |
+
# .eq("user_id", user_id)
|
| 303 |
+
# .eq("email", email)
|
| 304 |
+
# .execute()
|
| 305 |
+
# )
|
| 306 |
+
|
| 307 |
+
# print(update_status_response)
|
| 308 |
+
# except Exception as e:
|
| 309 |
+
# print(f"Error occurred during processing: {e}")
|