from PIL import Image import base64 from io import BytesIO import os import re import tempfile import wave import requests import gradio as gr import time import shutil import json import nltk import mysql.connector import fnmatch # audio related code is not included based on Arun's input # audio package import speech_recognition as sr from pydub import AudioSegment from pydub.playback import play # SMTP code is not included since HFSpaces doesn't support it # email library import smtplib, ssl from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.base import MIMEBase from email import encoders # langchain from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnableSequence, RunnableLambda from langchain_openai import ChatOpenAI from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.utilities import SQLDatabase from langchain.agents import create_tool_calling_agent, AgentExecutor, Tool from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.tools import StructuredTool #from langchain.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field from PyPDF2 import PdfReader from nltk.tokenize import sent_tokenize from datetime import datetime from sqlalchemy import create_engine from sqlalchemy.sql import text import openai # pandas import pandas as pd from pandasai.llm.openai import OpenAI from pandasai import SmartDataframe from dotenv import load_dotenv # Load environment variables load_dotenv() # langfuse analytics from langfuse.callback import CallbackHandler # Inventory API data table from tabulate import tabulate #forcefully stop the agent execution import concurrent.futures import threading # mailjet_rest to send email from mailjet_rest import Client import base64 #for PDF form filling from PyPDFForm import FormWrapper #Variables Initialization agent_executor = None vector_store1 = None texts1 = None excel_dataframe = None file_extension = None total_rows = "" docstatus = "" sample_table = "" #This is to define the summary of the runtime tool. This summary will be updated in prompt template and description of the new tool run_time_tool_summary="" # Define global variables for managing the thread and current_event executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) current_event = None stop_event = threading.Event() # LangFuse API keys and host settings os.environ["LANGFUSE_PUBLIC_KEY"] = os.getenv("LANGFUSE_PUBLIC_KEY") os.environ["LANGFUSE_SECRET_KEY"] = os.getenv("LANGFUSE_SECRET_KEY") os.environ["LANGFUSE_HOST"] = os.getenv("LANGFUSE_HOST") DB_USER = 'u852023448_redmindgpt' DB_PASSWORD = 'redmindGpt@123' DB_HOST = '217.21.88.10' DB_NAME = 'u852023448_redmindgpt' langfuse_handler = CallbackHandler() langfuse_handler.auth_check() # Optional: Checks if the authentication is successful nltk.download('punkt') open_api_key_token = os.getenv("OPEN_AI_API") os.environ['OPENAI_API_KEY'] = open_api_key_token pdf_path = "Inbound.pdf" db_uri = os.getenv("POSTGRESQL_CONNECTION") # Database setup db = SQLDatabase.from_uri(db_uri) user_email = "" warehouse_name = "" warehouse_id = "" # Today's date to be populated in inventory API inventory_date = datetime.today().strftime('%Y-%m-%d') apis = [ # fetch warehouse ID { "url": "http://193.203.162.39:8383/nxt-wms/userWarehouse/fetchWarehouseForUserId?", "params": {"query": warehouse_name, "userId": 164} }, # Stock summary based on warehouse id { "url": "http://193.203.162.39:8383/nxt-wms/transactionHistory/stockSummary?", "params": {"branchId": 343, "onDate": inventory_date, "warehouseId": warehouse_id} } ] # LLM setup llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.1) llm_chart = OpenAI(is_safe=False) def get_schema(_): schema_info = db.get_table_info() # This should be a string of your SQL schema return schema_info def generate_sql_query(question): schema = get_schema(None) template_query_generation = """ Schema: {schema} Question: {question} Provide a SQL query to answer the above question using the exact field names and table names specified in the schema. SQL Query (Please provide only the SQL statement without explanations or formatting): """ prompt_query_generation = ChatPromptTemplate.from_template(template_query_generation) schema_and_question = RunnableLambda(lambda _: {'schema': schema, 'question': question}) sql_chain = RunnableSequence( schema_and_question, prompt_query_generation, llm.bind(stop=["SQL Query End"]), # Adjust the stop sequence to your need StrOutputParser() ) sql_query = sql_chain.invoke({}) sql_query = sql_chain.invoke({}, config={"callbacks": [langfuse_handler]}) return sql_query.strip() def run_query(query): # Clean the query by removing markdown symbols and trimming whitespace clean_query = query.replace("```sql", "").replace("```", "").strip() print(f"Executing SQL Query: {clean_query}") try: result = db.run(clean_query) return result except Exception as e: print(f"Error executing query: {e}") return None # Define the database query tool # The function that uses the above models # Define the function that will handle the database query def database_tool(question): sql_query = generate_sql_query(question) return run_query(sql_query) def get_ASN_data(question): base_url = os.getenv("ASN_API_URL") print(f"base_url{base_url}") complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN" try: response = requests.get(complete_url) print(f"complete_url{complete_url}") print(f"response{response}") data = response.json() response.raise_for_status() if 'result' in data and 'content' in data['result'] and data['result']['content']: content = data['result']['content'][0] trnHeaderAsn = content['trnHeaderAsn'] party = content['party'][0] transactionUid = trnHeaderAsn['transactionUid'] customerOrderNo = trnHeaderAsn.get('customerOrderNo', 'N/A') orderDate = trnHeaderAsn.get('orderDate', 'N/A') customerInvoiceNo = trnHeaderAsn.get('customerInvoiceNo', 'N/A') invoiceDate = trnHeaderAsn.get('invoiceDate', 'N/A') expectedReceivingDate = trnHeaderAsn['expectedReceivingDate'] transactionStatus = trnHeaderAsn['transactionStatus'] shipper_code = party['shipper']['code'] if party['shipper'] else 'N/A' shipper_name = party['shipper']['name'] if party['shipper'] else 'N/A' data = [ ["Transaction UID", transactionUid], ["Customer Order No", customerOrderNo], ["Order Date", orderDate], ["Customer Invoice No", customerInvoiceNo], ["Invoice Date", invoiceDate], ["Expected Receiving Date", expectedReceivingDate], ["Transaction Status", transactionStatus], ["Shipper Code", shipper_code], ["Shipper Name", shipper_name] ] return f"The ASN details of {question} is {data}." else: return "ASN Details are not found. Please contact system administrator." except requests.exceptions.HTTPError as http_err: print(f"HTTP error occurred: {http_err}") except Exception as err: print(f"An error occurred: {err}") def load_and_split_pdf(pdf_path): reader = PdfReader(pdf_path) text = '' for page in reader.pages: text += page.extract_text() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50) texts = text_splitter.split_text(text) return texts def create_vector_store(texts): embeddings = OpenAIEmbeddings() vector_store = FAISS.from_texts(texts, embeddings) return vector_store def query_vector_store(vector_store, query, config=None): if config: print("Config passed:", config) docs = vector_store.similarity_search(query, k=5) print(f"Vector store return: {docs}") return docs def summarize_document(docs): summarized_docs = [] for doc in docs: if isinstance(doc, list): doc_content = ' '.join([d.page_content for d in doc]) else: doc_content = doc.page_content sentences = sent_tokenize(doc_content) if len(sentences) > 5: summarized_content = ' '.join(sentences[:5]) else: summarized_content = doc_content summarized_docs.append(summarized_content) return '\n\n'.join(summarized_docs) texts = load_and_split_pdf(pdf_path) vector_store = create_vector_store(texts) def document_data_tool_runtime(question): print(f"Document data runtime tool enter: {question} with {vector_store1}") query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]}) return query_response def document_data_tool(question): print(f"Document data tool enter: {question}") # query_string = question['tags'][0] if 'tags' in question and question['tags'] else "" query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]}) # summarized_response = summarize_document(query_response) return query_response # mailjet API since SMTP is not supported HF spaces def send_email_with_attachment_mailjet(recipient_email, subject, body, attach_img_base64=None): api_key = os.getenv("MAILJET_API_KEY") api_secret = os.getenv("MAILJET_API_SECRET") # Initialize the Mailjet client mailjet = Client(auth=(api_key, api_secret), version='v3.1') # Define the email details with an attachment data = { 'Messages': [ { "From": { "Email": "lakshmi.vairamani@redmindtechnologies.com", "Name": "Redmind Technologies" }, "To": [ { "Email": recipient_email, "Name": "" } ], "Subject": subject, "TextPart": body, "CustomID": "AppGettingStartedTest", "Attachments": [ { "ContentType": "image/png", # Replace with the correct MIME type of your image "Filename": "inventory_report.png", # Name of the image as it will appear in the email "Base64Content": attach_img_base64 # Base64-encoded image content } ] } ] } # Send the email result = mailjet.send.create(data=data) # Check if the email was sent successfully if result.status_code == 200: print("Email sent successfully with attachment!") else: print(f"Failed to send email. Status code: {result.status_code}") print(result.json()) #smtp lib def send_email_with_attachment(recipient_email, subject, body, attachment_path): try: sender_email = os.getenv("EMAIL_SENDER") sender_password = os.getenv("EMAIL_PASSWORD") # Create a multipart message msg = MIMEMultipart() msg['From'] = sender_email msg['To'] = recipient_email msg['Subject'] = subject # Attach the body with the msg instance msg.attach(MIMEText(body, 'plain')) # Open the file to be sent attachment = open(attachment_path, "rb") # print("Attached the image") # Instance of MIMEBase and named as p part = MIMEBase('application', 'octet-stream') # To change the payload into encoded form part.set_payload((attachment).read()) # Encode into base64 encoders.encode_base64(part) part.add_header('Content-Disposition', f"attachment; filename= {attachment_path}") # Attach the instance 'part' to instance 'msg' msg.attach(part) server = smtplib.SMTP('smtp.gmail.com', 587) server.starttls() server.login(sender_email, sender_password) text = msg.as_string() server.sendmail(sender_email, recipient_email, text) server.quit() except Exception as error: print(f"An error occurred: {error}") # return 1 def make_api_request(url, params): """Generic function to make API GET requests and return JSON data.""" try: response = requests.get(url, params=params) response.raise_for_status() # Raises an HTTPError if the response was an error return response.json() # Return the parsed JSON data except requests.exceptions.HTTPError as http_err: print(f"HTTP error occurred: {http_err}") except Exception as err: print(f"An error occurred: {err}") def inventory_report(question): # Split the question to extract warehouse name, user question, and optional email if question.count(":") > 0: parts = question.split(":", 2) warehouse_name= parts[0].strip() user_question = parts[1].strip() user_email = parts[2].strip() if len(parts) > 2 else None print(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}") else: return "warehouse name not found" data = make_api_request(apis[0]["url"], apis[0]["params"]) print(data) if data: # Extracting the id for the warehouse with the name "WH" warehouse_id = next((item['id'] for item in data['result'] if item['wareHouseId'] == warehouse_name), None) if (warehouse_id): # Step 3: Update the placeholder with the actual warehouse_id for api in apis: if "warehouseId" in api["params"]: api["params"]["warehouseId"] = warehouse_id data1 = make_api_request(apis[1]["url"], apis[1]["params"]) if (data1): headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name", "Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"] table_data = [] for index, item in enumerate(data1['result'], start=1): row = [ index, # Serial number item['warehouse']['code'], item['warehouse']['name'], item['customer']['code'], item['customer']['name'], item['skuMaster']['code'], item['skuMaster']['name'], item['currency']['code'], item['eanUpc'], item['uom']['code'], item['totalQty'], item['grossWeight'], item['volume'], item['totalValue'] ] table_data.append(row) # Convert to pandas DataFrame df = pd.DataFrame(table_data, columns=headers) chart_link = chat_with_llm(df,question) return chart_link else: return "There are no inventory details for the warehouse you have given." else: return "Please provide a warehouse name available in the database." def chat_with_llm(df,question): sdf = SmartDataframe(df, config={"llm": llm_chart}) llm_response = sdf.chat(question) return llm_response def bind_llm(llm, tools,prompt_template): llm = llm.bind() agent = create_tool_calling_agent(llm, tools, ChatPromptTemplate.from_template(prompt_template)) agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) return agent_executor # Define input and output models using Pydantic class QueryInput(BaseModel): question: str = Field( description="The question to be answered by appropriate tool. Please follow the instructions. For API tool, do not send the question as it is. Please send the ASN id.")# Invoke datavisulaization tool by processing the user question and send two inputs to the tool. One input will be the warehouse name and another input to the tool will be the entire user_question itself. Please join those two strings and send them as a single input string with ':' as delimiter") # config: dict = Field(default={}, description="Optional configuration for the database query.") # Define the output model for database queries class QueryOutput(BaseModel): result: str = Field(..., description="Display the answer based on the prompts given in each tool. For dataVisualization tool, it sends a image file as output. Please give the image file path only to the gr.Image. For DocumentData tool, Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points.") # Wrap the function with StructuredTool for better parameter handling tools = [ StructuredTool( func=get_ASN_data, name="APIData", args_schema=QueryInput, output_schema=QueryOutput, description="Tool to get details of ASN api. ASN id will be in the input with the format of first three letters as ASN and it is followed by 11 digit numeral. Pass only the id as input. Do not send the complete user question to the tool. If there are any other queries related to ASN without ASN id, please use the document tool." ), StructuredTool( func=document_data_tool, name="DocumentData", args_schema=QueryInput, output_schema=QueryOutput, description="You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. " ), StructuredTool( func=database_tool, name="DatabaseQuery", args_schema=QueryInput, output_schema=QueryOutput, description="Tool to query the database based on structured input." ), StructuredTool( func=inventory_report, name="dataVisualization", args_schema=QueryInput, output_schema=QueryOutput, description=""" Tool to generate a visual chart output for a particular warehouse based on the provided question. This tool processes the user question to identify the warehouse name and the specific request. If the user specifies an email, include the email in the input. The input format should be: 'warehouse name: user question: email (if any)'. The tool generates the requested chart and sends it to the provided email if specified. Examples: 1. Question without email, without warehouse: "Analyze item name and quantity in a bar chart in warehouse" Input to tool: "I want to analyze item name and quantity in a bar chart" 2. Question with email: "Analyze item name and quantity in a bar chart in warehouse Allcargo Logistics and send email to example@example.com" Input to tool: "Allcargo Logistics: I want to analyze item name and quantity in a bar chart: example@example.com" """ ) ] prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows. For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way. You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response. For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master. For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output. {{agent_scratchpad}} Here is the information you need to process: Question: {{input}}""" agent_executor = bind_llm(llm,tools,prompt_template) def ensure_temp_chart_dir(): temp_chart_dir = os.getenv("IMAGE_MAIN_URL") if not os.path.exists(temp_chart_dir): os.makedirs(temp_chart_dir) def clean_gradio_tmp_dir(): tmp_dir = os.getenv("IMAGE_GRADIO_PATH") if os.path.exists(tmp_dir): try: shutil.rmtree(tmp_dir) except Exception as e: print(f"Error cleaning up /tmp/gradio/ directory: {e}") # Define the interface function max_iterations = 5 iterations = 0 def handle_query(user_question, chatbot, audio=None): """ Function to handle the processing of user input with `AgentExecutor.invoke()`. """ global current_event, stop_event # Clear previous stop event and current_event stop_event.clear() if current_event and not current_event.done(): chatbot.append(("","A query is already being processed. Please stop it before starting a new one.")) return gr.update(value=chatbot) # Start the processing in a new thread current_event = executor.submit(answer_question_thread, user_question, chatbot) # Periodically check if current_event is done while not current_event.done(): if stop_event.is_set(): #current_event.task.cancel() # Attempt to cancel the current_event current_event.set_result((user_question, "Sorry, we encountered an error while processing your request. Please try after some time.")) current_event.cancel() # Attempt to cancel the current_event executor.shutdown(wait=False) # Shutdown the executor print("Current event cancelled") print(current_event.cancelled()) chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time.")) return gr.update(value=chatbot) time.sleep(1) # Wait for 1 second before checking again if current_event.cancelled(): chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time.")) return gr.update(value=chatbot) else: try: user_question1, response_text1 = current_event.result() # Get the result of the completed current_event print("output") print(user_question1) print(response_text1) chatbot.append((user_question1, response_text1)) return gr.update(value=chatbot) except Exception as e: print(f"Error occurred: {e}") chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time.")) return gr.update(value=chatbot) def stop_processing(chatbot): """ Stops the current processing if it's running. """ global current_event, stop_event if current_event and not current_event.done(): stop_event.set() # Signal the process to stop current_event.cancel() # Attempt to cancel the current_event chatbot.append(("Sorry, we encountered an error while processing your request. Please try after some time.","")) return gr.update(value=chatbot) # This function is for agent executor invoke with the option of stop def answer_question_thread(user_question, chatbot,audio=None): global iterations iterations = 0 # Ensure the temporary chart directory exists # ensure_temp_chart_dir() # Clean the /tmp/gradio/ directory # clean_gradio_tmp_dir() # Handle audio input if provided """ if audio is not None: sample_rate, audio_data = audio audio_segment = AudioSegment( audio_data.tobytes(), frame_rate=sample_rate, sample_width=audio_data.dtype.itemsize, channels=1 ) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: audio_segment.export(temp_audio_file.name, format="wav") temp_audio_file_path = temp_audio_file.name recognizer = sr.Recognizer() with sr.AudioFile(temp_audio_file_path) as source: audio_content = recognizer.record(source) try: user_question = recognizer.recognize_google(audio_content) except sr.UnknownValueError: user_question = "Sorry, I could not understand the audio." except sr.RequestError: user_question = "Could not request results from Google Speech Recognition service." """ while iterations < max_iterations: response = agent_executor.invoke({"input": user_question}, config={"callbacks": [langfuse_handler]}, early_stopping_method="generate") if isinstance(response, dict): response_text = response.get("output", "") else: response_text = response if "invalid" not in response_text.lower(): break iterations += 1 if iterations == max_iterations: return user_question , "Sorry, I couldn't complete your request" #"The agent could not generate a valid response within the iteration limit." if os.getenv("IMAGE_PATH") in response_text: # Open the image file img = Image.open(os.getenv("IMAGE_PATH")) # Convert the PIL Image to a base64 encoded string buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") img = f'' response_text = response.get("output", "").split(".")[0] + img email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' match = re.search(email_pattern, user_question) if match: user_email = match.group() # Return the matched email # email send if len(user_email) > 0: # Send email with the chart image attached send_email_with_attachment_mailjet( recipient_email=user_email, subject="Warehouse Inventory Report", body=response.get("output", "").split(".")[0] + ". This is an auto-generated email containing a chart created using Generative AI.", # attachment_path=chart_path attach_img_base64=img_str) if "send email to" in user_question: try: os.remove(img) # Clean up the temporary image file except Exception as e: print(f"Error cleaning up image file: {e}") except Exception as e: print(f"Error loading image file: {e}") response_text = "Chart generation failed. Please try again." return user_question, response_text else: return user_question, response_text # response_text = response_text.replace('\n', ' ').replace(' ', ' ').strip() # return response_text # without forceful stop option def answer_question(user_question, chatbot, audio=None): global iterations iterations = 0 # Ensure the temporary chart directory exists # ensure_temp_chart_dir() # Clean the /tmp/gradio/ directory # clean_gradio_tmp_dir() # Handle audio input if provided if audio is not None: sample_rate, audio_data = audio audio_segment = AudioSegment( audio_data.tobytes(), frame_rate=sample_rate, sample_width=audio_data.dtype.itemsize, channels=1 ) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: audio_segment.export(temp_audio_file.name, format="wav") temp_audio_file_path = temp_audio_file.name recognizer = sr.Recognizer() with sr.AudioFile(temp_audio_file_path) as source: audio_content = recognizer.record(source) try: user_question = recognizer.recognize_google(audio_content) except sr.UnknownValueError: user_question = "Sorry, I could not understand the audio." except sr.RequestError: user_question = "Could not request results from Google Speech Recognition service." while iterations < max_iterations: response = agent_executor.invoke({"input": user_question}, config={"callbacks": [langfuse_handler]}) if isinstance(response, dict): response_text = response.get("output", "") else: response_text = response if "invalid" not in response_text.lower(): break iterations += 1 if iterations == max_iterations: return "The agent could not generate a valid response within the iteration limit." if os.getenv("IMAGE_PATH") in response_text: # Open the image file img = Image.open(os.getenv("IMAGE_PATH")) # Convert the PIL Image to a base64 encoded string buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") img = f'' chatbot.append((user_question, img)) email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' match = re.search(email_pattern, user_question) if match: user_email = match.group() # Return the matched email # email send if len(user_email) > 0: # Send email with the chart image attached send_email_with_attachment_mailjet( recipient_email=user_email, subject="Warehouse Inventory Report", body=response.get("output", "").split(".")[0], # attachment_path=chart_path attachment_path=img_str) # Send email with the chart image attached """send_email_with_attachment( recipient_email=user_email, subject="Warehouse Inventory Report", body=response.get("output", "").split(":")[0], # attachment_path=chart_path attachment_path=os.getenv("IMAGE_PATH") )""" if "send email to" in user_question: try: os.remove(img) # Clean up the temporary image file except Exception as e: print(f"Error cleaning up image file: {e}") except Exception as e: print(f"Error loading image file: {e}") chatbot.append((user_question, "Chart generation failed. Please try again.")) return gr.update(value=chatbot) else: chatbot.append((user_question, response_text)) return gr.update(value=chatbot) def submit_feedback(feedback, chatbot, request: gr.Request): gr.Info("Thank you for your feedback.") #save feedback with user question and response in database save_feedback(request.username,chatbot[-1][0], chatbot[-1][1], feedback) feedback_response = "User feedback: " + feedback return chatbot + [(feedback_response, None)], gr.update(visible=False), gr.update(visible=False) # Function to connect to MySQL database def connect_to_db(): return mysql.connector.connect( host=DB_HOST, user=DB_USER, password=DB_PASSWORD, database=DB_NAME ) # Function to save feedback to the database def save_feedback(username, user_question, user_response, feedback): try: conn = connect_to_db() cursor = conn.cursor() query = "INSERT INTO user_feedback (username, question, response, feedback) VALUES (%s, %s, %s, %s)" cursor.execute(query, (username, user_question, user_response, feedback)) conn.commit() except mysql.connector.Error as err: print(f"Error: {err}") finally: if cursor: cursor.close() if conn: conn.close() def handle_dislike(data: gr.LikeData): if not data.liked: print("downvote") gr.Info("Please enter your feedback.") return gr.update(visible=True), gr.update(visible=True) else: print("upvote") return gr.update(visible=False), gr.update(visible=False) # greet with user name on successful login def update_message(request: gr.Request): return f"

Welcome, {request.username}

" # Function to generate a 50-word summary of the newly uploaded doc using OpenAI def generate_summary(text): prompt = ( "You are an AI that helps with document analysis. Please provide a concise title and a summary of the following document. " "The summary should be about 50 words and include key details that can help answer questions accurately:\n\n" f"{text}\n\nTitle : Summary" ) # Call the OpenAI API to generate a summary response = openai.chat.completions.create( messages=[ { "role": "user", "content": prompt, } ], model="gpt-4o-mini", ) # Extract the title and summary from the response response_content = response.choices[0].message.content lines = response_content.split("\n") # Extract title title_line = lines[0] title = title_line.split("**Title:**")[-1].strip() # Extract summary summary_line = lines[2] summary = summary_line.split("**Summary:**")[-1].strip() return title, summary #function to handle file upload decide whether excel or doc is uploaded and respective tool will be created with appropriate prompts at runtime def upload_file(filepath): global vector_store1, file_extension # Get the file extension _, file_extension = os.path.splitext(filepath) if file_extension == ".pdf": texts1 = load_and_split_pdf(filepath) vector_store1 = create_vector_store(texts1) # Generate a 50-word summary from the extracted text title, summary = generate_summary(texts1) return title, summary, file_extension elif file_extension == ".xlsx": title, prompt = process_excel(filepath) return title, prompt def generate_example_questions(sheet_name, column_headers): """ Generates natural language questions based on column headers. Args: sheet_name (str): The name of the Excel sheet. column_headers (list): List of column headers from the sheet. Returns: questions (list): List of generated questions based on the columns. """ questions = [] # Check for typical columns and create questions if 'Product Name' in column_headers or 'Product' in column_headers: questions.append(f"What is the total sales for a specific product in {sheet_name}?") if 'Sales Amount' in column_headers or 'Amount' in column_headers: questions.append(f"What is the total sales amount for a specific region in {sheet_name}?") if 'Region' in column_headers: questions.append(f"Which region had the highest sales in {sheet_name}?") if 'Date' in column_headers: questions.append(f"What were the total sales during a specific month in {sheet_name}?") if 'Price' in column_headers: questions.append(f"What is the price of a specific product in {sheet_name}?") if any(fnmatch.fnmatch(header, 'Employee*') for header in column_headers): questions.append(f"What are the details of the distinct broker names?") return questions def generate_prompt_from_excel_file(df_dict): """ Generates a prompt from an Excel file containing multiple sheets. Args: excel_file_path (str): The path to the Excel file. Returns: prompt (str): A detailed prompt including sheet names, column headers, sample data, and example questions for each sheet. """ # Initialize prompt with basic structure prompt = "You have been provided with an Excel file containing data in several sheets.\n" # Loop through each sheet to extract column headers and sample data for sheet_name, sheet_df in df_dict.items(): # Extract column headers column_headers = list(sheet_df.columns) # Get a sample of the data (first few rows) sample_data = sheet_df.head(3).to_string(index=False) # Add sheet details to the prompt prompt += f"For the sheet '{sheet_name}', the column headers are:" prompt += f"{', '.join(column_headers)}\n\n" #prompt += f"Example data from sheet '{sheet_name}':\n" #prompt += f"{sample_data}\n\n" # Generate example natural language questions based on columns example_questions = generate_example_questions(sheet_name, column_headers) #prompt += "### Example Questions:\n" #for question in example_questions: # prompt += f"- {question}\n" #prompt += "\n" # Finalize the prompt with function call description prompt += f"- Query: A natural language question (e.g., List all the employees with broker name ADP or Alerus). The question should be sent as 'What are the employee details with broker name ADP or Alerus :'." prompt += f"""Output : {docstatus}. Here is the sample table: {sample_table}. """ prompt += f"- Query: A natural language question with request to create LOA document (e.g., can you create LOA document for all the employees with broker name ADP or Alerus). The question should be sent as 'What are the employee details with broker name ADP or Alerus : LOA document'." prompt += f"""Output: {docstatus}. Here is the sample table: {sample_table}. If there is any error, please display the message returned by the function as response. """ return "Excel data", prompt # Function to handle "Add to RedMindGPT" button click def add_to_redmindgpt(title, summary): """ Adds a document or Excel file to the RedmindGPT system and configures the appropriate runtime tool for handling related queries. Parameters: title (str): The title of the document or Excel file. summary (str): A brief summary of the document or Excel file. Returns: str: A message indicating whether the file has been added successfully. Behavior: - If the file extension is ".pdf", it sets up a runtime tool for handling document-related queries. - If the file extension is ".xlsx", it sets up a runtime tool for handling Excel data-related queries. - Configures the prompt template for the agent executor based on the file type. - Adds the configured runtime tool to the list of tools used by the agent executor. """ global agent_executor, file_extension if file_extension == ".pdf": run_time_tool_summary = f"For {title} document related questions, Please refer runtimeDocumentData tool. {summary}. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points." run_time_tool = StructuredTool( func=document_data_tool_runtime, name="runtimeDocumentData", args_schema=QueryInput, output_schema=QueryOutput, description=f"You are an AI assistant trained to help with the questions based on the uploaded document {title}. {summary}. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points." ) # Add the new tool to the beginning tools.insert(0, run_time_tool) prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows. {run_time_tool_summary} For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way. You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response. For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master. For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output. {{agent_scratchpad}} Here is the information you need to process: Question: {{input}}""" agent_executor = bind_llm(llm,tools,prompt_template) return f"File has been added successfully." elif file_extension == ".xlsx": run_time_excel_tool_summary = f"For {title} related questions, Please refer runtimeExcelData tool. {summary}. Display the response only in the format as mentioned in the tool description. " run_time_excel_tool = StructuredTool( func=chat_with_excel_data_dataframe, name="runtimeExcelData", args_schema=QueryInput, output_schema=QueryOutput, description=f"""You are an AI assistant trained to handle Excel data and return meaningful insights. If user query is given with an option of generating the document with the result set dataframe, pass two inputs to the tool. First input is the user query and the second input will be the phrase "create document". display the response only in the below format. {docstatus}. Here is the sample data: {sample_table}. Please provide the total rows count from the {total_rows} values returned by the function and not the count of sample table rows. If there is any error, please display the message returned by the function as response. """ ) # Add the new tool to the beginning tools.insert(0, run_time_excel_tool) prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows. {run_time_excel_tool_summary} For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way. You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response. For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master. For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output. {{agent_scratchpad}} Here is the information you need to process: Question: {{input}}""" agent_executor = bind_llm(llm,tools,prompt_template) return f"File has been added successfully." def process_excel(file): global excel_dataframe # Check if the file is None if file is None: return "Excel file", "Your excel does not have values. Please upload a different file." # Return an empty dataframe if no file is uploaded else: # Read the uploaded Excel file excel_dataframe = pd.read_excel(file.name, sheet_name=None) # 'file.name' to get the actual file path #to get title and summary of excel file title, prompt = generate_prompt_from_excel_file(excel_dataframe) excel_dataframe = pd.read_excel(file.name) return title, prompt # Return the success message. def chat_with_excel_data(question): global excel_dataframe response_dataframe = chat_with_llm(excel_dataframe,question) print(response_dataframe) return response_dataframe def chat_with_excel_data_dataframe(question): isDataFrame = True print(f"question for excel data frame : {question}") if "LOA" in question: #question = question.replace("create document", "").strip() create_document = True else: create_document = False print(f"create document : {create_document}") response_dataframe = chat_with_excel_data(question) if isinstance(response_dataframe, pd.DataFrame) == False: print("The result is not a DataFrame.") if ":" in response_dataframe: isDataFrame = False names_part = response_dataframe.split(":", 1)[1] # Get everything after the colon and space # Split the names by commas to create a list names = names_part.split(",") # Convert the list of names to a DataFrame response_dataframe = pd.DataFrame(names, columns=["Result"]) #handle large dataset response = handle_large_dataset(response_dataframe, create_document,isDataFrame) return response #Save the respnse dataframe to an Excel file in hostinger so that the user can download it #save_file_path = "dataframe_output.xlsx" #response_dataframe.to_excel(save_file_path, index=False) #save_file_to_hostinger(save_file_path) # Check if the response is a DataFrame """if isinstance(response_dataframe, pd.DataFrame): # Convert DataFrame to HTML for display df_html = response_dataframe.to_html(classes='dataframe', index=False) print(f"dfhtml:{df_html}") return df_html""" #return response_dataframe.head(10)#, len(response_dataframe) def save_file_to_hostinger(save_file_path): from ftplib import FTP # Step 2: FTP server credentials ftp_host = 'ftp.redmindtechnologies.com' # Replace with your FTP server address ftp_user = 'u852023448.redmindGpt' # Replace with your FTP username ftp_pass = 'RedMind@505' # Replace with your FTP password remote_file_path = '/RedMindGPT/output.xlsx' # Replace with the desired path on the server # Create an FTP connection ftp = FTP(ftp_host) ftp.login(ftp_user, ftp_pass) # Open the local file and upload it to the server with open(save_file_path, 'rb') as file: ftp.storbinary(f'STOR {remote_file_path}', file) print(f'File {save_file_path} uploaded to {remote_file_path} on server.') # Close the FTP connection ftp.quit() def handle_large_dataset(df, create_document,isDataFrame): total_rows = len(df) #print(df) print(f"Total rows: {total_rows}") docstatus = f"Download the complete dataset here..There are total of {total_rows} rows." #docstatus = f"Download the complete dataset here..There are total of {total_rows} rows." if total_rows < 4000: # 1. Limit to first 10 rows # 2. Handle missing values #limited_data.fillna("N/A", inplace=True) # 3. Drop the original first column if len(df.columns) > 1: # Skipping the original first column limited_data = df.head(3) limited_data_without_first_column = limited_data.iloc[:, 1:] else: limited_data = df.head(20) limited_data_without_first_column = limited_data #print( "range "+ len(limited_data_without_first_column)) # 4. Add SNo (serial number) as the first column, starting from 1 if isDataFrame : limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data_without_first_column) + 1)) else: limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1)) # 3. Save the full dataset to a downloadable file import os # Get the current working directory current_folder = os.getcwd() file_path = os.path.join(current_folder, 'output_data.xlsx') df.to_excel(file_path, index=False) files = os.listdir(current_folder) print(f"Files in persistent storage: {files}") print(f"The current folder is: {current_folder}") """from huggingface_hub import Repository repo = Repository( local_dir="./", repo_type="space", repo_id="Redmind/NewageNXTGPT", use_auth_token=os.getenv("HF_TOKEN"), )""" file_path = "output_data.xlsx" #download_url = repo.get_download_url(file_path) from huggingface_hub import upload_file # Upload file to the Hugging Face Hub repo_id = "Redmind/NewageNXTGPT" #file_path = "/app/example.txt" # Path to the file to upload from huggingface_hub import login # Login to Hugging Face Hub login(token=os.getenv("HF_TOKEN")) from huggingface_hub import HfApi api = HfApi() api.upload_file(path_or_fileobj=file_path, repo_id=repo_id, repo_type= "space", path_in_repo="data/output.xlsx") from huggingface_hub import hf_hub_url print(hf_hub_url( repo_id="Redmind/NewageNXTGPT", filename="data/output.xlsx" )) #print(f"Download the file here: {download_url}") #save_file_to_hostinger('output_data.xlsx') # 4. Create a summary and table of the first 10 rows for display #columns = list(df.columns) sample_table = limited_data_without_first_column.to_markdown() #print(sample_table) if create_document: #Logic to generate pdfs with employee name and account number for index, row in df.iterrows(): # Create a PDF for each row create_pdf(row['Account Name'], row['Account ID']) create_document = False docstatus += f" {total_rows} documents are created successfully." print(sample_table) # 5. Return the summary and downloadable link #return f""" #There are a total of {total_rows} rows. Please download the complete dataset here: Download. Here are the first 3 rows: #{sample_table} """ return sample_table, docstatus else: return "Your query returns a large dataset which is not supported in the current version. Please try a different query." def create_pdf(name,id): filled = FormWrapper("Goldman_LOA - Gold.pdf").fill( { "Title of Account": name, "Account Number": id, "Print Name and Title": name }, ) #output_file_name = f"documents\\{name}.pdf" output_file_name = f"{name}.pdf" with open(output_file_name, "wb+") as output: output.write(filled.read()) repo_id = "Redmind/NewageNXTGPT" file_output=f"data/{output_file_name}" from huggingface_hub import HfApi api = HfApi() api.upload_file(path_or_fileobj=output_file_name, repo_id=repo_id, repo_type= "space", path_in_repo=file_output) return f"{output_file_name} is created successfully." css = """ /* Example of custom button styling */ .gr-button { background-color: #6366f1; /* Change to your desired button color */ color: white; border-radius: 8px; /* Make the corners rounded */ border: none; padding: 10px 20px; font-size: 12px; cursor: pointer; } .gr-button:hover { background-color: #8a92f7; /* Darker shade on hover */ } .gr-buttonbig { background-color: #6366f1; /* Change to your desired button color */ color: white; border-radius: 8px; /* Make the corners rounded */ border: none; padding: 10px 20px; font-size: 14px; cursor: pointer; } .gr-buttonbig:hover { background-color: #8a92f7; /* Darker shade on hover */ } /* Customizing the Logout link to be on the right */ .logout-link { text-align: right; display: inline-block; width: 100%; } .logout-link a { color: #4A90E2; /* Link color */ text-decoration: none; font-size: 16px; } .chatbot_gpt { height: 600px !important; /* Adjust height as needed */ } .logout-link a:hover { text-decoration: underline; /* Underline on hover */ } .message-buttons-right{ display: none !important; } body, .gradio-container { margin: 0; padding: 0; } /* Styling the tab header with a blue background */ .gr-tab-header { background-color: #4A90E2; /* Blue background for the tab header */ padding: 10px; border-radius: 8px; color: white; font-size: 16px; } /* Styling the selected tab text color to be green */ .gr-tab-header .gr-tab-active { color: green; /* Change selected tab text to green */ } /* Keep non-selected tab text color white */ .gr-tab-header .gr-tab { color: white; } /* Custom CSS for reducing the size of the video element */ .video-player { width: 500px; /* Set a custom width for the video */ height: 350px; /* Set a custom height for the video */ margin: 0 auto; /* Center the video horizontally */ } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: gr.HTML("

RedMindGPT

") # Logout link styled as text link in the right corner gr.Markdown("") # Unified RedMindGPT Interface with gr.Row(): m = gr.Markdown() demo.load(update_message, None, m) # Buttons for sample queries with gr.Row(): sample_button = gr.Button("What are the details of ASN24091600002", elem_classes="gr-buttonbig") sample_button1 = gr.Button("What are the active warehouses available", elem_classes="gr-buttonbig") sample_button2 = gr.Button("Explain Pre-Receiving Yard Management", elem_classes="gr-buttonbig") sample_button3 = gr.Button("can you generate a histogram chart with item name and customer for warehouse WH1000001", elem_classes="gr-buttonbig") sample_button4 = gr.Button("Analyze item name & quantity for different customers in a stacked bar chart for the warehouse WH1000001 & send email to meetarun@gmail.com", elem_classes="gr-button") # Chatbot component with gr.Row(): chatbot = gr.Chatbot(label="Select any of the questions listed above to experience RedMindGPT in action.", elem_classes="chatbot_gpt") # Textbox for user questions with gr.Row(): with gr.Column(scale=1): message = gr.Textbox(show_label=False, container=False, placeholder="Please enter your question") with gr.Row(): feedback_textbox = gr.Textbox(visible=False, show_label=False, container=False, placeholder="Please enter your feedback.") submit_feedback_button = gr.Button("Submit Feedback", visible=False, elem_classes="gr-buttonbig") with gr.Column(scale=1): with gr.Row(): button = gr.Button("Submit", elem_id="submit", elem_classes="gr-buttonbig") stop_button = gr.Button("Stop", elem_classes="gr-buttonbig") # Rearranged to place Upload Doc and Upload Excel in the same row with gr.Row(): with gr.Column(scale=1): # File Upload Section gr.Markdown("**Add a document or Excel for natural language interaction.**") with gr.Column(scale=1): u = gr.UploadButton("Upload a doc/excel", file_count="single", elem_classes="gr-buttonbig") #excel_file = gr.UploadButton("Upload an excel", file_count="single", elem_classes="gr-buttonbig", file_types=[".xlsx", ".xls"]) with gr.Column(scale=1): add_button = gr.Button("Add to RedMindGPT", elem_classes="gr-buttonbig", visible=False) with gr.Row(): title_textbox = gr.Textbox(label="Title", visible=False) summary_textarea = gr.Textbox(label="Summary", lines=5, visible=False) output_message = gr.Markdown() # Markdown to display output message success_message = gr.Markdown() # Placeholder for messages # Moved function calling lines to the end stop_button.click(stop_processing, [chatbot], [chatbot]) button.click(handle_query, [message, chatbot], [chatbot]) message.submit(handle_query, [message, chatbot], [chatbot]) message.submit(lambda x: gr.update(value=""), None, [message], queue=False) button.click(lambda x: gr.update(value=''), [], [message]) chatbot.like(handle_dislike, None, outputs=[feedback_textbox, submit_feedback_button]) submit_feedback_button.click(submit_feedback, [feedback_textbox, chatbot], [chatbot, feedback_textbox, submit_feedback_button]) submit_feedback_button.click(lambda x: gr.update(value=''), [], [feedback_textbox]) sample_button.click(handle_query, [sample_button, chatbot], [chatbot]) sample_button1.click(handle_query, [sample_button1, chatbot], [chatbot]) sample_button2.click(handle_query, [sample_button2, chatbot], [chatbot]) sample_button3.click(handle_query, [sample_button3, chatbot], [chatbot]) sample_button4.click(handle_query, [sample_button4, chatbot], [chatbot]) u.upload(upload_file, u, [title_textbox, summary_textarea]) u.upload(lambda _: (gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)), None, [title_textbox, summary_textarea, add_button]) add_button.click(add_to_redmindgpt, [title_textbox, summary_textarea], output_message) add_button.click(lambda _: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)), None, [title_textbox, summary_textarea, add_button]) demo.launch(auth=[("lakshmi", "redmind"), ("arun", "redmind"), ("NewageGlobal", "Newage123$")], auth_message="RedMindGPT", inline=False)