Spaces:

Redmind
/

NewageNXTGPT

Sleeping

App Files Files Community

Redmind commited on 14 days ago

Commit

4213a59

verified ·

1 Parent(s): c5fe17e

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -63

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ from datetime import datetime
 from sqlalchemy import create_engine
 from sqlalchemy.sql import text
 import openai
 # pandas
 import pandas as pd
 from pandasai.llm.openai import OpenAI
@@ -76,6 +76,9 @@ from huggingface_hub import HfApi, HfFolder
 import os
 import zipfile
 # Get the current working directory
 current_folder = os.getcwd()
 #Variables Initialization
@@ -186,12 +189,12 @@ def generate_sql_query(question):
 def run_query(query):
     # Clean the query by removing markdown symbols and trimming whitespace
     clean_query = query.replace("```sql", "").replace("```", "").strip()
-    print(f"Executing SQL Query: {clean_query}")
     try:
         result = db.run(clean_query)
         return result
     except Exception as e:
-        print(f"Error executing query: {e}")
         return None
@@ -205,12 +208,11 @@ def database_tool(question):
 def get_ASN_data(question):
     base_url = os.getenv("ASN_API_URL")
-    print(f"base_url{base_url}")
     complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN"
     try:
         response = requests.get(complete_url)
-        print(f"complete_url{complete_url}")
-        print(f"response{response}")
         data = response.json()
         response.raise_for_status()
@@ -245,10 +247,10 @@ def get_ASN_data(question):
             return "ASN Details are not found. Please contact system administrator."
     except requests.exceptions.HTTPError as http_err:
-        print(f"HTTP error occurred: {http_err}")
         return "Sorry, we encountered an error while processing your request. Please try after some time."
     except Exception as err:
-        print(f"An error occurred: {err}")
         return "Sorry, we encountered an error while processing your request. Please try after some time."
 def load_and_split_pdf(pdf_path):
@@ -271,7 +273,7 @@ def query_vector_store(vector_store, query, config=None):
     if config:
         print("Config passed:", config)
     docs = vector_store.similarity_search(query, k=5)
-    print(f"Vector store return: {docs}")
     return docs
@@ -296,12 +298,12 @@ texts = load_and_split_pdf(pdf_path)
 vector_store = create_vector_store(texts)
 def document_data_tool_runtime(question):
-    print(f"Document data runtime tool enter: {question} with {vector_store1}")
     query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]})
     return query_response
 def document_data_tool(question):
-    print(f"Document data tool enter: {question}")
     # query_string = question['tags'][0] if 'tags' in question and question['tags'] else ""
     query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]})
     # summarized_response = summarize_document(query_response)
@@ -350,9 +352,9 @@ def send_email_with_attachment_mailjet(recipient_email, subject, body, attach_im
     # Check if the email was sent successfully
     if result.status_code == 200:
-        print("Email sent successfully with attachment!")
     else:
-        print(f"Failed to send email. Status code: {result.status_code}")
         print(result.json())
@@ -393,7 +395,7 @@ def send_email_with_attachment(recipient_email, subject, body, attachment_path):
         server.quit()
     except Exception as error:
-        print(f"An error occurred: {error}")
     # return 1
@@ -402,16 +404,14 @@ def make_api_request(url, params):
     """Generic function to make API GET requests and return JSON data."""
     try:
         response = requests.get(url, params=params)
-        print("url:",url)
-        print("params:",params)
-        print("response:",response)
-        print(response.status_code)
         response.raise_for_status()  # Raises an HTTPError if the response was an error
         return response.json()  # Return the parsed JSON data
     except requests.exceptions.HTTPError as http_err:
-        print(f"HTTP error occurred: {http_err}")
     except Exception as err:
-        print(f"An error occurred: {err}")
 def inventory_report(question):
@@ -421,19 +421,19 @@ def inventory_report(question):
         warehouse_name= parts[0].strip()
         user_question = parts[1].strip()
         user_email = parts[2].strip() if len(parts) > 2 else None
-        print(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}")
     else:
         return "warehouse name not found"
-    print(f"warehouse name: {warehouse_name}")
     apis[0]["params"]["query"] = warehouse_name
-    print(apis[0]["params"])
     data = make_api_request(apis[0]["url"], apis[0]["params"])
-    print("Warehouse data:",data)
     if data:
         # Extracting the id for the warehouse with the name "WH"
         warehouse_id = next((item['id'] for item in data['result'] if item['name'] == warehouse_name), None)
-        print(f"warehouse_id:{warehouse_id}")
         if warehouse_id:
             print("before api id")
             # Step 3: Update the placeholder with the actual warehouse_id
@@ -442,10 +442,9 @@ def inventory_report(question):
                     if "warehouseId" in api["params"]:
                         api["params"]["warehouseId"] = warehouse_id
-            print("after api id")
-            print(apis)
             data1 = make_api_request(apis[1]["url"], apis[1]["params"])
-            print(data1)
             if data1:
                 headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name",
                            "Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"]
@@ -475,13 +474,13 @@ def inventory_report(question):
                                 ]
                                 table_data.append(row)
                             except KeyError as e:
-                                print(f"Missing Key Error for item: {item} - {e}")
                     else:
-                        print("Content is either missing or not a list:", result.get('content'))
                 else:
-                    print("Result key is missing or not a dictionary in data1:", data1.get('result'))
-                    print("No data available in 'content'.")
                     return "There are no inventory details for the warehouse you have given."
                 # Convert to pandas DataFrame
@@ -584,7 +583,7 @@ def clean_gradio_tmp_dir():
         try:
             shutil.rmtree(tmp_dir)
         except Exception as e:
-            print(f"Error cleaning up /tmp/gradio/ directory: {e}")
 # Define the interface function
@@ -786,7 +785,7 @@ def answer_question_thread(user_question, chatbot,audio=None):
     # return response_text
-# without forceful stop option
 def answer_question(user_question, chatbot, audio=None):
     global iterations
@@ -813,10 +812,12 @@ def answer_question(user_question, chatbot, audio=None):
             audio_content = recognizer.record(source)
         try:
             user_question = recognizer.recognize_google(audio_content)
-        except sr.UnknownValueError:
             user_question = "Sorry, I could not understand the audio."
-        except sr.RequestError:
             user_question = "Could not request results from Google Speech Recognition service."
     while iterations < max_iterations:
@@ -863,7 +864,7 @@ def answer_question(user_question, chatbot, audio=None):
                     # attachment_path=chart_path
                     attachment_path=img_str)
-                # Send email with the chart image attached
                 """send_email_with_attachment(
                     recipient_email=user_email,
                     subject="Warehouse Inventory Report",
@@ -876,9 +877,9 @@ def answer_question(user_question, chatbot, audio=None):
             try:
                 os.remove(img)  # Clean up the temporary image file
             except Exception as e:
-                print(f"Error cleaning up image file: {e}")
             except Exception as e:
-                print(f"Error loading image file: {e}")
                 chatbot.append((user_question, "Chart generation failed. Please try again."))
         return gr.update(value=chatbot)
@@ -913,7 +914,7 @@ def save_feedback(username, user_question, user_response, feedback):
         cursor.execute(query, (username, user_question, user_response, feedback))
         conn.commit()
     except mysql.connector.Error as err:
-        print(f"Error: {err}")
     finally:
         if cursor:
             cursor.close()
@@ -1045,15 +1046,11 @@ def generate_prompt_from_excel_file(df_dict):
         # Add sheet details to the prompt
         prompt += f"For the sheet '{sheet_name}', the column headers are:"
         prompt += f"{', '.join(column_headers)}\n\n"
-        #prompt += f"Example data from sheet '{sheet_name}':\n"
-        #prompt += f"{sample_data}\n\n"
         # Generate example natural language questions based on columns
         example_questions = generate_example_questions(sheet_name, column_headers)
-        #prompt += "### Example Questions:\n"
-        #for question in example_questions:
-         #   prompt += f"- {question}\n"
-        #prompt += "\n"
     # Finalize the prompt with function call description
     prompt += f"- Query: A natural language question (e.g., List the distinct broker names). The question should be sent as 'What are the distinct broker names in the excel document.'."
@@ -1249,7 +1246,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
                 # List of required columns
                 required_columns = ['BROKER', 'ACCOUNT NUMBER', 'EMPLOYEE NAME', 'ACCOUNT NAME', 'ACCOUNT ID']
                 # Filter the DataFrame to include only the required columns
-                #print(df[required_columns])
                 #limited_data = df[required_columns]
                 limited_data11 = df.head(3)
                 limited_data = limited_data11[required_columns]
@@ -1266,25 +1263,19 @@ def handle_large_dataset(df, create_document,isDataFrame):
             limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1))
         # 3. Save the full dataset to a downloadable file
         file_path = "output_data.xlsx"
         #file_path = os.path.join(current_folder, 'output_data.xlsx')
         #Broker Name, Account Number, Employee name,Account Owner,Account ID
         df.to_excel(file_path, index=False)
         global user_name
         # Get today's date and current time
         now = datetime.now()
         # Format the date and time as 'YYYY-MM-DD HH:MM'
         formatted_date_time = now.strftime("%Y-%m-%d %H:%M")
-        print(formatted_date_time)
         directory = user_name + "/" + formatted_date_time
         create_file_HF(file_path, directory,False)
         dataset_link = get_download_link(directory,file_path)
@@ -1296,7 +1287,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
         #columns = list(df.columns)
         sample_table = limited_data_without_first_column.to_markdown()
-        #print(sample_table)
         if create_document:
             #Logic to generate pdfs with employee name and account number
             for index, row in df.iterrows():
@@ -1313,8 +1304,7 @@ def handle_large_dataset(df, create_document,isDataFrame):
             link = get_download_link(directory,zip_file_name)
             print(f"downloadable link: {link}")
             docstatus = f"""Please download <a href="{dataset_link}" download>excel</a> and <a href="{link}" download>PDFs</a>."""
-            print(docstatus)
-        print(sample_table)
         # 5. Return the summary and downloadable link
         #return f"""
         #There are a total of {total_rows} rows. Please download the complete dataset here: <a href="https://redmindtechnologies.com/RedMindGPT/output.xlsx" download>Download</a>. Here are the first 3 rows:
@@ -1337,8 +1327,6 @@ def create_file_HF(file_path,directory,document_created = False):
         directory = directory + "/" + file_path
     else:
          directory = directory + "/" + file_path
     api.upload_file(path_or_fileobj=file_path, repo_id=repo_id,revision = branch, repo_type= "space", path_in_repo=directory)
@@ -1376,7 +1364,7 @@ def create_pdf(cname,ename,account_number, directory):
     # Get the absolute path
     file_path = os.path.abspath(output_file_name)
-    print(f"The file was created at: {file_path}")
     #create_file_HF(output_file_name, directory,document_created)
@@ -1393,7 +1381,7 @@ def zip_files_in_folder(directory_output, output_zip):
             if file.endswith(".pdf") and os.path.isfile(file_path):
                 # Add file to the ZIP archive
                 zipf.write(file_path, file)  # Save with its filename in the ZIP
-    print(f"ZIP file created: {output_zip}, {output_zip}, {directory_output}")
 def directory_exists(repo_id, directory, token):
     try:
@@ -1406,7 +1394,7 @@ def directory_exists(repo_id, directory, token):
         # Check if any file starts with the directory path
         return any(file.startswith(directory) for file in files)
     except Exception as e:
-        print(f"Error checking directory existence: {e}")
         return False
 def get_download_link(file_path,file_name):

 from sqlalchemy import create_engine
 from sqlalchemy.sql import text
 import openai
+import logging
 # pandas
 import pandas as pd
 from pandasai.llm.openai import OpenAI
 import os
 import zipfile
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='app.log', filemode='a')
 # Get the current working directory
 current_folder = os.getcwd()
 #Variables Initialization
 def run_query(query):
     # Clean the query by removing markdown symbols and trimming whitespace
     clean_query = query.replace("```sql", "").replace("```", "").strip()
+    logging.info(f"Executing SQL Query: {clean_query}")
     try:
         result = db.run(clean_query)
         return result
     except Exception as e:
+        logging.info(f"Error executing query: {e}")
         return None
 def get_ASN_data(question):
     base_url = os.getenv("ASN_API_URL")
     complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN"
     try:
         response = requests.get(complete_url)
         data = response.json()
         response.raise_for_status()
             return "ASN Details are not found. Please contact system administrator."
     except requests.exceptions.HTTPError as http_err:
+        logging.info(f"HTTP error occurred: {http_err}")
         return "Sorry, we encountered an error while processing your request. Please try after some time."
     except Exception as err:
+        logging.info(f"An error occurred: {err}")
         return "Sorry, we encountered an error while processing your request. Please try after some time."
 def load_and_split_pdf(pdf_path):
     if config:
         print("Config passed:", config)
     docs = vector_store.similarity_search(query, k=5)
+    logging.info(f"Vector store return: {docs}")
     return docs
 vector_store = create_vector_store(texts)
 def document_data_tool_runtime(question):
+    logging.info(f"Document data runtime tool enter: {question} with {vector_store1}")
     query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]})
     return query_response
 def document_data_tool(question):
+    logging.info(f"Document data tool enter: {question}")
     # query_string = question['tags'][0] if 'tags' in question and question['tags'] else ""
     query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]})
     # summarized_response = summarize_document(query_response)
     # Check if the email was sent successfully
     if result.status_code == 200:
+        logging.info("Email sent successfully with attachment!")
     else:
+        logging.info(f"Failed to send email. Status code: {result.status_code}")
         print(result.json())
         server.quit()
     except Exception as error:
+        logging.info(f"An error occurred: {error}")
     # return 1
     """Generic function to make API GET requests and return JSON data."""
     try:
         response = requests.get(url, params=params)
         response.raise_for_status()  # Raises an HTTPError if the response was an error
         return response.json()  # Return the parsed JSON data
     except requests.exceptions.HTTPError as http_err:
+        logging.info(f"HTTP error occurred: {http_err}")
     except Exception as err:
+        logging.info(f"An error occurred: {err}")
 def inventory_report(question):
         warehouse_name= parts[0].strip()
         user_question = parts[1].strip()
         user_email = parts[2].strip() if len(parts) > 2 else None
+        logging.info(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}")
     else:
         return "warehouse name not found"
     apis[0]["params"]["query"] = warehouse_name
     data = make_api_request(apis[0]["url"], apis[0]["params"])
     if data:
         # Extracting the id for the warehouse with the name "WH"
         warehouse_id = next((item['id'] for item in data['result'] if item['name'] == warehouse_name), None)
         if warehouse_id:
             print("before api id")
             # Step 3: Update the placeholder with the actual warehouse_id
                     if "warehouseId" in api["params"]:
                         api["params"]["warehouseId"] = warehouse_id
             data1 = make_api_request(apis[1]["url"], apis[1]["params"])
             if data1:
                 headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name",
                            "Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"]
                                 ]
                                 table_data.append(row)
                             except KeyError as e:
+                                logging.info(f"Missing Key Error for item: {item} - {e}")
                     else:
+                        logging.info("Content is either missing or not a list:", result.get('content'))
                 else:
+                    logging.info("Result key is missing or not a dictionary in data1:", data1.get('result'))
+                    logging.info("No data available in 'content'.")
                     return "There are no inventory details for the warehouse you have given."
                 # Convert to pandas DataFrame
         try:
             shutil.rmtree(tmp_dir)
         except Exception as e:
+            logging.info(f"Error cleaning up /tmp/gradio/ directory: {e}")
 # Define the interface function
     # return response_text
+# without forceful stop option. not using this function block since we need stop button
 def answer_question(user_question, chatbot, audio=None):
     global iterations
             audio_content = recognizer.record(source)
         try:
             user_question = recognizer.recognize_google(audio_content)
+        except sr.UnknownValueError as e:
             user_question = "Sorry, I could not understand the audio."
+            logging.info((f"Issue with understanding audio {e}"))
+        except sr.RequestError as e:
             user_question = "Could not request results from Google Speech Recognition service."
+            logging.info((f"Could not request results from Google Speech Recognition service. {e}"))
     while iterations < max_iterations:
                     # attachment_path=chart_path
                     attachment_path=img_str)
+                # Send email with the chart image attached. SMTP is not working in HF spaces
                 """send_email_with_attachment(
                     recipient_email=user_email,
                     subject="Warehouse Inventory Report",
             try:
                 os.remove(img)  # Clean up the temporary image file
             except Exception as e:
+                logging.info(f"Error cleaning up image file: {e}")
             except Exception as e:
+                logging.info(f"Error loading image file: {e}")
                 chatbot.append((user_question, "Chart generation failed. Please try again."))
         return gr.update(value=chatbot)
         cursor.execute(query, (username, user_question, user_response, feedback))
         conn.commit()
     except mysql.connector.Error as err:
+        logging.info(f"Error: {err}")
     finally:
         if cursor:
             cursor.close()
         # Add sheet details to the prompt
         prompt += f"For the sheet '{sheet_name}', the column headers are:"
         prompt += f"{', '.join(column_headers)}\n\n"
         # Generate example natural language questions based on columns
         example_questions = generate_example_questions(sheet_name, column_headers)
     # Finalize the prompt with function call description
     prompt += f"- Query: A natural language question (e.g., List the distinct broker names). The question should be sent as 'What are the distinct broker names in the excel document.'."
                 # List of required columns
                 required_columns = ['BROKER', 'ACCOUNT NUMBER', 'EMPLOYEE NAME', 'ACCOUNT NAME', 'ACCOUNT ID']
                 # Filter the DataFrame to include only the required columns
                 #limited_data = df[required_columns]
                 limited_data11 = df.head(3)
                 limited_data = limited_data11[required_columns]
             limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1))
         # 3. Save the full dataset to a downloadable file
         file_path = "output_data.xlsx"
         #file_path = os.path.join(current_folder, 'output_data.xlsx')
         #Broker Name, Account Number, Employee name,Account Owner,Account ID
         df.to_excel(file_path, index=False)
         global user_name
         # Get today's date and current time
         now = datetime.now()
         # Format the date and time as 'YYYY-MM-DD HH:MM'
         formatted_date_time = now.strftime("%Y-%m-%d %H:%M")
         directory = user_name + "/" + formatted_date_time
         create_file_HF(file_path, directory,False)
         dataset_link = get_download_link(directory,file_path)
         #columns = list(df.columns)
         sample_table = limited_data_without_first_column.to_markdown()
         if create_document:
             #Logic to generate pdfs with employee name and account number
             for index, row in df.iterrows():
             link = get_download_link(directory,zip_file_name)
             print(f"downloadable link: {link}")
             docstatus = f"""Please download <a href="{dataset_link}" download>excel</a> and <a href="{link}" download>PDFs</a>."""
         # 5. Return the summary and downloadable link
         #return f"""
         #There are a total of {total_rows} rows. Please download the complete dataset here: <a href="https://redmindtechnologies.com/RedMindGPT/output.xlsx" download>Download</a>. Here are the first 3 rows:
         directory = directory + "/" + file_path
     else:
          directory = directory + "/" + file_path
     api.upload_file(path_or_fileobj=file_path, repo_id=repo_id,revision = branch, repo_type= "space", path_in_repo=directory)
     # Get the absolute path
     file_path = os.path.abspath(output_file_name)
+    logging.info(f"The file was created at: {file_path}")
     #create_file_HF(output_file_name, directory,document_created)
             if file.endswith(".pdf") and os.path.isfile(file_path):
                 # Add file to the ZIP archive
                 zipf.write(file_path, file)  # Save with its filename in the ZIP
+    logging.info(f"ZIP file created: {output_zip}, {output_zip}, {directory_output}")
 def directory_exists(repo_id, directory, token):
     try:
         # Check if any file starts with the directory path
         return any(file.startswith(directory) for file in files)
     except Exception as e:
+        logging.info(f"Error checking directory existence: {e}")
         return False
 def get_download_link(file_path,file_name):