Redmind commited on
Commit
4213a59
·
verified ·
1 Parent(s): c5fe17e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -63
app.py CHANGED
@@ -44,7 +44,7 @@ from datetime import datetime
44
  from sqlalchemy import create_engine
45
  from sqlalchemy.sql import text
46
  import openai
47
-
48
  # pandas
49
  import pandas as pd
50
  from pandasai.llm.openai import OpenAI
@@ -76,6 +76,9 @@ from huggingface_hub import HfApi, HfFolder
76
  import os
77
 
78
  import zipfile
 
 
 
79
  # Get the current working directory
80
  current_folder = os.getcwd()
81
  #Variables Initialization
@@ -186,12 +189,12 @@ def generate_sql_query(question):
186
  def run_query(query):
187
  # Clean the query by removing markdown symbols and trimming whitespace
188
  clean_query = query.replace("```sql", "").replace("```", "").strip()
189
- print(f"Executing SQL Query: {clean_query}")
190
  try:
191
  result = db.run(clean_query)
192
  return result
193
  except Exception as e:
194
- print(f"Error executing query: {e}")
195
  return None
196
 
197
 
@@ -205,12 +208,11 @@ def database_tool(question):
205
 
206
  def get_ASN_data(question):
207
  base_url = os.getenv("ASN_API_URL")
208
- print(f"base_url{base_url}")
209
  complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN"
210
  try:
211
  response = requests.get(complete_url)
212
- print(f"complete_url{complete_url}")
213
- print(f"response{response}")
214
  data = response.json()
215
  response.raise_for_status()
216
 
@@ -245,10 +247,10 @@ def get_ASN_data(question):
245
  return "ASN Details are not found. Please contact system administrator."
246
 
247
  except requests.exceptions.HTTPError as http_err:
248
- print(f"HTTP error occurred: {http_err}")
249
  return "Sorry, we encountered an error while processing your request. Please try after some time."
250
  except Exception as err:
251
- print(f"An error occurred: {err}")
252
  return "Sorry, we encountered an error while processing your request. Please try after some time."
253
 
254
  def load_and_split_pdf(pdf_path):
@@ -271,7 +273,7 @@ def query_vector_store(vector_store, query, config=None):
271
  if config:
272
  print("Config passed:", config)
273
  docs = vector_store.similarity_search(query, k=5)
274
- print(f"Vector store return: {docs}")
275
  return docs
276
 
277
 
@@ -296,12 +298,12 @@ texts = load_and_split_pdf(pdf_path)
296
  vector_store = create_vector_store(texts)
297
 
298
  def document_data_tool_runtime(question):
299
- print(f"Document data runtime tool enter: {question} with {vector_store1}")
300
  query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]})
301
  return query_response
302
 
303
  def document_data_tool(question):
304
- print(f"Document data tool enter: {question}")
305
  # query_string = question['tags'][0] if 'tags' in question and question['tags'] else ""
306
  query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]})
307
  # summarized_response = summarize_document(query_response)
@@ -350,9 +352,9 @@ def send_email_with_attachment_mailjet(recipient_email, subject, body, attach_im
350
 
351
  # Check if the email was sent successfully
352
  if result.status_code == 200:
353
- print("Email sent successfully with attachment!")
354
  else:
355
- print(f"Failed to send email. Status code: {result.status_code}")
356
  print(result.json())
357
 
358
 
@@ -393,7 +395,7 @@ def send_email_with_attachment(recipient_email, subject, body, attachment_path):
393
  server.quit()
394
 
395
  except Exception as error:
396
- print(f"An error occurred: {error}")
397
 
398
  # return 1
399
 
@@ -402,16 +404,14 @@ def make_api_request(url, params):
402
  """Generic function to make API GET requests and return JSON data."""
403
  try:
404
  response = requests.get(url, params=params)
405
- print("url:",url)
406
- print("params:",params)
407
- print("response:",response)
408
- print(response.status_code)
409
  response.raise_for_status() # Raises an HTTPError if the response was an error
410
  return response.json() # Return the parsed JSON data
411
  except requests.exceptions.HTTPError as http_err:
412
- print(f"HTTP error occurred: {http_err}")
413
  except Exception as err:
414
- print(f"An error occurred: {err}")
415
 
416
 
417
  def inventory_report(question):
@@ -421,19 +421,19 @@ def inventory_report(question):
421
  warehouse_name= parts[0].strip()
422
  user_question = parts[1].strip()
423
  user_email = parts[2].strip() if len(parts) > 2 else None
424
- print(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}")
425
  else:
426
  return "warehouse name not found"
427
- print(f"warehouse name: {warehouse_name}")
428
  apis[0]["params"]["query"] = warehouse_name
429
- print(apis[0]["params"])
430
  data = make_api_request(apis[0]["url"], apis[0]["params"])
431
- print("Warehouse data:",data)
432
  if data:
433
 
434
  # Extracting the id for the warehouse with the name "WH"
435
  warehouse_id = next((item['id'] for item in data['result'] if item['name'] == warehouse_name), None)
436
- print(f"warehouse_id:{warehouse_id}")
437
  if warehouse_id:
438
  print("before api id")
439
  # Step 3: Update the placeholder with the actual warehouse_id
@@ -442,10 +442,9 @@ def inventory_report(question):
442
  if "warehouseId" in api["params"]:
443
  api["params"]["warehouseId"] = warehouse_id
444
 
445
- print("after api id")
446
- print(apis)
447
  data1 = make_api_request(apis[1]["url"], apis[1]["params"])
448
- print(data1)
449
  if data1:
450
  headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name",
451
  "Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"]
@@ -475,13 +474,13 @@ def inventory_report(question):
475
  ]
476
  table_data.append(row)
477
  except KeyError as e:
478
- print(f"Missing Key Error for item: {item} - {e}")
479
  else:
480
- print("Content is either missing or not a list:", result.get('content'))
481
  else:
482
- print("Result key is missing or not a dictionary in data1:", data1.get('result'))
483
 
484
- print("No data available in 'content'.")
485
  return "There are no inventory details for the warehouse you have given."
486
 
487
  # Convert to pandas DataFrame
@@ -584,7 +583,7 @@ def clean_gradio_tmp_dir():
584
  try:
585
  shutil.rmtree(tmp_dir)
586
  except Exception as e:
587
- print(f"Error cleaning up /tmp/gradio/ directory: {e}")
588
 
589
 
590
  # Define the interface function
@@ -786,7 +785,7 @@ def answer_question_thread(user_question, chatbot,audio=None):
786
  # return response_text
787
 
788
 
789
- # without forceful stop option
790
  def answer_question(user_question, chatbot, audio=None):
791
 
792
  global iterations
@@ -813,10 +812,12 @@ def answer_question(user_question, chatbot, audio=None):
813
  audio_content = recognizer.record(source)
814
  try:
815
  user_question = recognizer.recognize_google(audio_content)
816
- except sr.UnknownValueError:
817
  user_question = "Sorry, I could not understand the audio."
818
- except sr.RequestError:
 
819
  user_question = "Could not request results from Google Speech Recognition service."
 
820
 
821
  while iterations < max_iterations:
822
 
@@ -863,7 +864,7 @@ def answer_question(user_question, chatbot, audio=None):
863
  # attachment_path=chart_path
864
  attachment_path=img_str)
865
 
866
- # Send email with the chart image attached
867
  """send_email_with_attachment(
868
  recipient_email=user_email,
869
  subject="Warehouse Inventory Report",
@@ -876,9 +877,9 @@ def answer_question(user_question, chatbot, audio=None):
876
  try:
877
  os.remove(img) # Clean up the temporary image file
878
  except Exception as e:
879
- print(f"Error cleaning up image file: {e}")
880
  except Exception as e:
881
- print(f"Error loading image file: {e}")
882
  chatbot.append((user_question, "Chart generation failed. Please try again."))
883
  return gr.update(value=chatbot)
884
 
@@ -913,7 +914,7 @@ def save_feedback(username, user_question, user_response, feedback):
913
  cursor.execute(query, (username, user_question, user_response, feedback))
914
  conn.commit()
915
  except mysql.connector.Error as err:
916
- print(f"Error: {err}")
917
  finally:
918
  if cursor:
919
  cursor.close()
@@ -1045,15 +1046,11 @@ def generate_prompt_from_excel_file(df_dict):
1045
  # Add sheet details to the prompt
1046
  prompt += f"For the sheet '{sheet_name}', the column headers are:"
1047
  prompt += f"{', '.join(column_headers)}\n\n"
1048
- #prompt += f"Example data from sheet '{sheet_name}':\n"
1049
- #prompt += f"{sample_data}\n\n"
1050
 
1051
  # Generate example natural language questions based on columns
1052
  example_questions = generate_example_questions(sheet_name, column_headers)
1053
- #prompt += "### Example Questions:\n"
1054
- #for question in example_questions:
1055
- # prompt += f"- {question}\n"
1056
- #prompt += "\n"
1057
 
1058
  # Finalize the prompt with function call description
1059
  prompt += f"- Query: A natural language question (e.g., List the distinct broker names). The question should be sent as 'What are the distinct broker names in the excel document.'."
@@ -1249,7 +1246,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
1249
  # List of required columns
1250
  required_columns = ['BROKER', 'ACCOUNT NUMBER', 'EMPLOYEE NAME', 'ACCOUNT NAME', 'ACCOUNT ID']
1251
  # Filter the DataFrame to include only the required columns
1252
- #print(df[required_columns])
1253
  #limited_data = df[required_columns]
1254
  limited_data11 = df.head(3)
1255
  limited_data = limited_data11[required_columns]
@@ -1266,25 +1263,19 @@ def handle_large_dataset(df, create_document,isDataFrame):
1266
 
1267
  limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1))
1268
  # 3. Save the full dataset to a downloadable file
1269
-
1270
-
1271
-
1272
-
1273
  file_path = "output_data.xlsx"
1274
  #file_path = os.path.join(current_folder, 'output_data.xlsx')
1275
  #Broker Name, Account Number, Employee name,Account Owner,Account ID
1276
  df.to_excel(file_path, index=False)
1277
-
1278
-
1279
 
1280
-
1281
  global user_name
1282
  # Get today's date and current time
1283
  now = datetime.now()
1284
 
1285
  # Format the date and time as 'YYYY-MM-DD HH:MM'
1286
  formatted_date_time = now.strftime("%Y-%m-%d %H:%M")
1287
- print(formatted_date_time)
1288
  directory = user_name + "/" + formatted_date_time
1289
  create_file_HF(file_path, directory,False)
1290
  dataset_link = get_download_link(directory,file_path)
@@ -1296,7 +1287,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
1296
 
1297
  #columns = list(df.columns)
1298
  sample_table = limited_data_without_first_column.to_markdown()
1299
- #print(sample_table)
1300
  if create_document:
1301
  #Logic to generate pdfs with employee name and account number
1302
  for index, row in df.iterrows():
@@ -1313,8 +1304,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
1313
  link = get_download_link(directory,zip_file_name)
1314
  print(f"downloadable link: {link}")
1315
  docstatus = f"""Please download <a href="{dataset_link}" download>excel</a> and <a href="{link}" download>PDFs</a>."""
1316
- print(docstatus)
1317
- print(sample_table)
1318
  # 5. Return the summary and downloadable link
1319
  #return f"""
1320
  #There are a total of {total_rows} rows. Please download the complete dataset here: <a href="https://redmindtechnologies.com/RedMindGPT/output.xlsx" download>Download</a>. Here are the first 3 rows:
@@ -1337,8 +1327,6 @@ def create_file_HF(file_path,directory,document_created = False):
1337
  directory = directory + "/" + file_path
1338
  else:
1339
  directory = directory + "/" + file_path
1340
-
1341
-
1342
 
1343
  api.upload_file(path_or_fileobj=file_path, repo_id=repo_id,revision = branch, repo_type= "space", path_in_repo=directory)
1344
 
@@ -1376,7 +1364,7 @@ def create_pdf(cname,ename,account_number, directory):
1376
 
1377
  # Get the absolute path
1378
  file_path = os.path.abspath(output_file_name)
1379
- print(f"The file was created at: {file_path}")
1380
  #create_file_HF(output_file_name, directory,document_created)
1381
 
1382
 
@@ -1393,7 +1381,7 @@ def zip_files_in_folder(directory_output, output_zip):
1393
  if file.endswith(".pdf") and os.path.isfile(file_path):
1394
  # Add file to the ZIP archive
1395
  zipf.write(file_path, file) # Save with its filename in the ZIP
1396
- print(f"ZIP file created: {output_zip}, {output_zip}, {directory_output}")
1397
 
1398
  def directory_exists(repo_id, directory, token):
1399
  try:
@@ -1406,7 +1394,7 @@ def directory_exists(repo_id, directory, token):
1406
  # Check if any file starts with the directory path
1407
  return any(file.startswith(directory) for file in files)
1408
  except Exception as e:
1409
- print(f"Error checking directory existence: {e}")
1410
  return False
1411
 
1412
  def get_download_link(file_path,file_name):
 
44
  from sqlalchemy import create_engine
45
  from sqlalchemy.sql import text
46
  import openai
47
+ import logging
48
  # pandas
49
  import pandas as pd
50
  from pandasai.llm.openai import OpenAI
 
76
  import os
77
 
78
  import zipfile
79
+ # Configure logging
80
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='app.log', filemode='a')
81
+
82
  # Get the current working directory
83
  current_folder = os.getcwd()
84
  #Variables Initialization
 
189
  def run_query(query):
190
  # Clean the query by removing markdown symbols and trimming whitespace
191
  clean_query = query.replace("```sql", "").replace("```", "").strip()
192
+ logging.info(f"Executing SQL Query: {clean_query}")
193
  try:
194
  result = db.run(clean_query)
195
  return result
196
  except Exception as e:
197
+ logging.info(f"Error executing query: {e}")
198
  return None
199
 
200
 
 
208
 
209
  def get_ASN_data(question):
210
  base_url = os.getenv("ASN_API_URL")
211
+
212
  complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN"
213
  try:
214
  response = requests.get(complete_url)
215
+
 
216
  data = response.json()
217
  response.raise_for_status()
218
 
 
247
  return "ASN Details are not found. Please contact system administrator."
248
 
249
  except requests.exceptions.HTTPError as http_err:
250
+ logging.info(f"HTTP error occurred: {http_err}")
251
  return "Sorry, we encountered an error while processing your request. Please try after some time."
252
  except Exception as err:
253
+ logging.info(f"An error occurred: {err}")
254
  return "Sorry, we encountered an error while processing your request. Please try after some time."
255
 
256
  def load_and_split_pdf(pdf_path):
 
273
  if config:
274
  print("Config passed:", config)
275
  docs = vector_store.similarity_search(query, k=5)
276
+ logging.info(f"Vector store return: {docs}")
277
  return docs
278
 
279
 
 
298
  vector_store = create_vector_store(texts)
299
 
300
  def document_data_tool_runtime(question):
301
+ logging.info(f"Document data runtime tool enter: {question} with {vector_store1}")
302
  query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]})
303
  return query_response
304
 
305
  def document_data_tool(question):
306
+ logging.info(f"Document data tool enter: {question}")
307
  # query_string = question['tags'][0] if 'tags' in question and question['tags'] else ""
308
  query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]})
309
  # summarized_response = summarize_document(query_response)
 
352
 
353
  # Check if the email was sent successfully
354
  if result.status_code == 200:
355
+ logging.info("Email sent successfully with attachment!")
356
  else:
357
+ logging.info(f"Failed to send email. Status code: {result.status_code}")
358
  print(result.json())
359
 
360
 
 
395
  server.quit()
396
 
397
  except Exception as error:
398
+ logging.info(f"An error occurred: {error}")
399
 
400
  # return 1
401
 
 
404
  """Generic function to make API GET requests and return JSON data."""
405
  try:
406
  response = requests.get(url, params=params)
407
+
408
+
 
 
409
  response.raise_for_status() # Raises an HTTPError if the response was an error
410
  return response.json() # Return the parsed JSON data
411
  except requests.exceptions.HTTPError as http_err:
412
+ logging.info(f"HTTP error occurred: {http_err}")
413
  except Exception as err:
414
+ logging.info(f"An error occurred: {err}")
415
 
416
 
417
  def inventory_report(question):
 
421
  warehouse_name= parts[0].strip()
422
  user_question = parts[1].strip()
423
  user_email = parts[2].strip() if len(parts) > 2 else None
424
+ logging.info(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}")
425
  else:
426
  return "warehouse name not found"
427
+
428
  apis[0]["params"]["query"] = warehouse_name
429
+
430
  data = make_api_request(apis[0]["url"], apis[0]["params"])
431
+
432
  if data:
433
 
434
  # Extracting the id for the warehouse with the name "WH"
435
  warehouse_id = next((item['id'] for item in data['result'] if item['name'] == warehouse_name), None)
436
+
437
  if warehouse_id:
438
  print("before api id")
439
  # Step 3: Update the placeholder with the actual warehouse_id
 
442
  if "warehouseId" in api["params"]:
443
  api["params"]["warehouseId"] = warehouse_id
444
 
445
+
 
446
  data1 = make_api_request(apis[1]["url"], apis[1]["params"])
447
+
448
  if data1:
449
  headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name",
450
  "Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"]
 
474
  ]
475
  table_data.append(row)
476
  except KeyError as e:
477
+ logging.info(f"Missing Key Error for item: {item} - {e}")
478
  else:
479
+ logging.info("Content is either missing or not a list:", result.get('content'))
480
  else:
481
+ logging.info("Result key is missing or not a dictionary in data1:", data1.get('result'))
482
 
483
+ logging.info("No data available in 'content'.")
484
  return "There are no inventory details for the warehouse you have given."
485
 
486
  # Convert to pandas DataFrame
 
583
  try:
584
  shutil.rmtree(tmp_dir)
585
  except Exception as e:
586
+ logging.info(f"Error cleaning up /tmp/gradio/ directory: {e}")
587
 
588
 
589
  # Define the interface function
 
785
  # return response_text
786
 
787
 
788
+ # without forceful stop option. not using this function block since we need stop button
789
  def answer_question(user_question, chatbot, audio=None):
790
 
791
  global iterations
 
812
  audio_content = recognizer.record(source)
813
  try:
814
  user_question = recognizer.recognize_google(audio_content)
815
+ except sr.UnknownValueError as e:
816
  user_question = "Sorry, I could not understand the audio."
817
+ logging.info((f"Issue with understanding audio {e}"))
818
+ except sr.RequestError as e:
819
  user_question = "Could not request results from Google Speech Recognition service."
820
+ logging.info((f"Could not request results from Google Speech Recognition service. {e}"))
821
 
822
  while iterations < max_iterations:
823
 
 
864
  # attachment_path=chart_path
865
  attachment_path=img_str)
866
 
867
+ # Send email with the chart image attached. SMTP is not working in HF spaces
868
  """send_email_with_attachment(
869
  recipient_email=user_email,
870
  subject="Warehouse Inventory Report",
 
877
  try:
878
  os.remove(img) # Clean up the temporary image file
879
  except Exception as e:
880
+ logging.info(f"Error cleaning up image file: {e}")
881
  except Exception as e:
882
+ logging.info(f"Error loading image file: {e}")
883
  chatbot.append((user_question, "Chart generation failed. Please try again."))
884
  return gr.update(value=chatbot)
885
 
 
914
  cursor.execute(query, (username, user_question, user_response, feedback))
915
  conn.commit()
916
  except mysql.connector.Error as err:
917
+ logging.info(f"Error: {err}")
918
  finally:
919
  if cursor:
920
  cursor.close()
 
1046
  # Add sheet details to the prompt
1047
  prompt += f"For the sheet '{sheet_name}', the column headers are:"
1048
  prompt += f"{', '.join(column_headers)}\n\n"
1049
+
 
1050
 
1051
  # Generate example natural language questions based on columns
1052
  example_questions = generate_example_questions(sheet_name, column_headers)
1053
+
 
 
 
1054
 
1055
  # Finalize the prompt with function call description
1056
  prompt += f"- Query: A natural language question (e.g., List the distinct broker names). The question should be sent as 'What are the distinct broker names in the excel document.'."
 
1246
  # List of required columns
1247
  required_columns = ['BROKER', 'ACCOUNT NUMBER', 'EMPLOYEE NAME', 'ACCOUNT NAME', 'ACCOUNT ID']
1248
  # Filter the DataFrame to include only the required columns
1249
+
1250
  #limited_data = df[required_columns]
1251
  limited_data11 = df.head(3)
1252
  limited_data = limited_data11[required_columns]
 
1263
 
1264
  limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1))
1265
  # 3. Save the full dataset to a downloadable file
1266
+
 
 
 
1267
  file_path = "output_data.xlsx"
1268
  #file_path = os.path.join(current_folder, 'output_data.xlsx')
1269
  #Broker Name, Account Number, Employee name,Account Owner,Account ID
1270
  df.to_excel(file_path, index=False)
 
 
1271
 
 
1272
  global user_name
1273
  # Get today's date and current time
1274
  now = datetime.now()
1275
 
1276
  # Format the date and time as 'YYYY-MM-DD HH:MM'
1277
  formatted_date_time = now.strftime("%Y-%m-%d %H:%M")
1278
+
1279
  directory = user_name + "/" + formatted_date_time
1280
  create_file_HF(file_path, directory,False)
1281
  dataset_link = get_download_link(directory,file_path)
 
1287
 
1288
  #columns = list(df.columns)
1289
  sample_table = limited_data_without_first_column.to_markdown()
1290
+
1291
  if create_document:
1292
  #Logic to generate pdfs with employee name and account number
1293
  for index, row in df.iterrows():
 
1304
  link = get_download_link(directory,zip_file_name)
1305
  print(f"downloadable link: {link}")
1306
  docstatus = f"""Please download <a href="{dataset_link}" download>excel</a> and <a href="{link}" download>PDFs</a>."""
1307
+
 
1308
  # 5. Return the summary and downloadable link
1309
  #return f"""
1310
  #There are a total of {total_rows} rows. Please download the complete dataset here: <a href="https://redmindtechnologies.com/RedMindGPT/output.xlsx" download>Download</a>. Here are the first 3 rows:
 
1327
  directory = directory + "/" + file_path
1328
  else:
1329
  directory = directory + "/" + file_path
 
 
1330
 
1331
  api.upload_file(path_or_fileobj=file_path, repo_id=repo_id,revision = branch, repo_type= "space", path_in_repo=directory)
1332
 
 
1364
 
1365
  # Get the absolute path
1366
  file_path = os.path.abspath(output_file_name)
1367
+ logging.info(f"The file was created at: {file_path}")
1368
  #create_file_HF(output_file_name, directory,document_created)
1369
 
1370
 
 
1381
  if file.endswith(".pdf") and os.path.isfile(file_path):
1382
  # Add file to the ZIP archive
1383
  zipf.write(file_path, file) # Save with its filename in the ZIP
1384
+ logging.info(f"ZIP file created: {output_zip}, {output_zip}, {directory_output}")
1385
 
1386
  def directory_exists(repo_id, directory, token):
1387
  try:
 
1394
  # Check if any file starts with the directory path
1395
  return any(file.startswith(directory) for file in files)
1396
  except Exception as e:
1397
+ logging.info(f"Error checking directory existence: {e}")
1398
  return False
1399
 
1400
  def get_download_link(file_path,file_name):