import os import gradio as gr from RAG import GT_RAG from openai import OpenAI from PyPDF2 import PdfReader from PIL import Image import pytesseract import mimetypes from RMPInfoGrabber import fetch_professor_data from GTScheduler import get_enrollment_metadata from MSCSSpecialization import get_class_requirements from CanvasIntegration import CanvasAPI import json openai_key = os.environ['openaikey'] os.environ['OPENAI_API_KEY'] = openai_key base_llm = OpenAI(api_key=openai_key) my_rag = GT_RAG('RAG_cache') def read_file_content(file_path): mime_type, _ = mimetypes.guess_type(file_path) content = "" if mime_type and mime_type.startswith('text'): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() elif mime_type == 'application/pdf': reader = PdfReader(file_path) for page in reader.pages: content += page.extract_text() elif mime_type and mime_type.startswith('image'): image = Image.open(file_path) content = pytesseract.image_to_string(image) else: content = f"[Unsupported file type: {mime_type}]" return content def truncate_filename(filename, max_length): name, ext = os.path.splitext(filename) if len(name) > max_length: name = name[:max_length] + '...' return f"{name} {ext}" def build_prompt(rag_input, api_input, user_input, conversation_history, file_content=""): prompt = ( "You will act as Reggie, the Georgia Tech Registration chatbot. " "You will be asked a variety of questions about registering for classes at Georgia Tech. " "You will keep your answers direct and relevant to the question asked. " "If the user asks you a question and you are unsure of the semantics or details of their question, " "please ask for clarification until you are confident in answering appropriately. " "Avoid providing information that is not relevant to the question asked. " "If you are unsure of the answer to a question, please ask for clarification or indicate that you are unsure. " "You may be evaluated on the quality of your responses, so please ensure that your responses are clear, concise, and accurate.\n\n" f"Answer the following question to the best of your ability:\n{user_input}\n" f"Utilize the following History of the Conversation:\n{conversation_history}\n" f"Utilize the following information from live Georgia Tech API's:\n{api_input}\n" f"Utilize the following information from Georgia Tech's FAQ documents. Be wary for noisy input:\n{rag_input}\n" ) # Include the file content if available if file_content: prompt += f"\nAdditional information provided by the user:\n{file_content}\n" return prompt def query_llm(llm, query): chat_completion = llm.chat.completions.create( messages=[ { "role": "user", "content": query, } ], model="gpt-3.5-turbo", ) return chat_completion.choices[0].message.content def build_display_history(history): # Build display history, skipping 'file' role messages display_history = [] i = 0 while i < len(history): if history[i]["role"] == "user": user_message = history[i]["content"] # Check for the corresponding bot response bot_message = "" if i + 1 < len(history) and history[i + 1]["role"] == "bot": bot_message = history[i + 1]["content"] i += 1 # Skip the bot message in the next iteration display_history.append((user_message, bot_message)) elif history[i]["role"] == "bot" and (i == 0 or history[i - 1]["role"] != "user"): # For the initial bot message or standalone bot messages display_history.append((None, history[i]["content"])) # Skip messages with role 'file' i += 1 return display_history def query_function_calls(llm, query): # Define the function schema for OpenAI function calling function_definitions = [ { "name": "fetch_professor_data", "description": "Fetches professor data from RateMyProfessor", "parameters": { "type": "object", "properties": { "professor_name": { "type": "string", "description": "Full name of the professor (e.g., 'John Smith')", } }, "required": ["professor_name"], }, }, { "name": "get_enrollment_metadata", "description": "Gets course registration data (such as waitlist and current people registered) from Georgia Tech's Registration Website based off of Course Registration Number (CRN)", "parameters": { "type": "object", "properties": { "course_registration_number": { "type": "string", "description": "Course Registration Number (e.g., 234873)", } }, "required": ["course_registration_number"], }, }, { "name": "get_class_requirements", "description": "Gets course requirements from Georgia Tech's official website given a Masters' student's major and specialization", "parameters": { "type": "object", "properties": { "major": { "type": "string", "description": "Major (e.g. 'Computer Science' or 'CS')", }, "specialization": { "type": "string", "description": "Specialization (e.g. 'computational perception and robotics' or 'machine learning')", } }, "required": ["major", "specialization"], }, }, { "name": "get_course_assignments", "description": "Gets assignments for a course based on the user query", "parameters": { "type": "object", "properties": { "assignment_type": { "type": "string", "description": '''Assignment type depending on due date and submission status. 'Past': 'Old Assignment', 'Overdue': 'Past due date', 'Undated': 'No due date', 'Ungraded': 'Not graded', 'Unsubmitted': 'Not submitted', 'Upcoming': 'Due in the future', ''', "enum": ['past', 'overdue', 'undated', 'ungraded', 'unsubmitted', 'upcoming'] }, "course_name": { "type": "string", "description": "Name of the course" } }, "required": ["course_name", "assignment_type"], }, }, { "name": "get_grades", "description": "Gets grades for an assignment of a course", "parameters": { "type": "object", "properties": { "course_name": { "type": "string", "description": "Name of the course" }, "assignment_name": { "type": "string", "description": "Name of the assignment" } }, "required": ["course_name", "assignment_name"], }, }, { "name": "get_assignment_details", "description": "Gets details for an assignment of a course", "parameters": { "type": "object", "properties": { "course_name": { "type": "string", "description": "Name of the course" }, "assignment_name": { "type": "string", "description": "Name of the assignment" } }, "required": ["course_name", "assignment_name"], }, } ] tools = [{"type": "function", "function": fdef} for fdef in function_definitions] response = llm.chat.completions.create( messages=[ { "role": "user", "content": query, } ], model="gpt-3.5-turbo", tools = tools, ) function_outputs = [] if response.choices[0].message.tool_calls is None: return "" for tool_call in response.choices[0].message.tool_calls: fname = tool_call.function.name fargs = json.loads(tool_call.function.arguments) function_response = "" if fname == "fetch_professor_data": professor_name = fargs['professor_name'] school_name = 'Georgia Institute of Technology' last_name = professor_name.strip().split()[-1] function_response = fetch_professor_data(school_name, last_name, professor_name) elif fname == "get_enrollment_metadata": function_response = get_enrollment_metadata(fargs["course_registration_number"]) elif fname == "get_class_requirements": function_response = get_class_requirements(fargs["major"], fargs["specialization"]) elif tool_call.function.name == "get_grades": arguments = json.loads(tool_call.function.arguments) function_response = "Unable to find grade information" # canvas_api.get_grades(arguments['course_name'], arguments['assignment_name']) elif tool_call.function.name == "get_course_assignments": arguments = json.loads(tool_call.function.arguments) if not ('assignment_type' in arguments): arguments['assignment_type'] = 'ungraded' function_response = "Unable to find course information" # canvas_api.get_course_assignments(arguments['course_name'], arguments['assignment_type']) elif tool_call.function.name == "get_assignment_details": arguments = json.loads(tool_call.function.arguments) function_response = "Unable to find assignment information" # canvas_api.get_assignment_details(arguments['course_name'], arguments['assignment_name']) # Append the function's response function_outputs.append(function_response) if function_outputs is None or not function_outputs: function_outputs = [""] function_outputs = f"Relevant information from external API calls: {'. '.join(function_outputs)}" return function_outputs def generate_response(history): # Get the latest user message user_input = history[-1]["content"] if user_input.startswith("Successful File Upload:"): # Generate the bot's response based on the file content and special prompt # Find the last 'file' entry in history file_content = None for item in reversed(history): if item["role"] == "file": file_content = item["content"] break if file_content is None: # No file content found bot_response = "An error occurred: No file content found." history.append({"role": "bot", "content": bot_response}) display_history = build_display_history(history) return display_history, history # Build the special prompt prompt = ( "You are to determine if the following file contents are a syllabus or not. " "If the file contents are a syllabus, please output the following message: " "'It appears you uploaded the syllabus for the class {class number and name from the syllabus} " "here is some key information from this document: " "Class Name: {Class Number and Name} \n" "Professor: {Professor} \n" "Class Contents: {2 sentence summary of the contents of the class} \n" "Class Time: {Days of the week and times for the class lectures} \n" "Location: {Location of the class} \n" "Grade Breakdown: {Grade percentage distribution} \n" "Please ask any additional questions you may have about this class!'. " "If any of the aforementioned information is not available, please simply put 'Unknown' in the unknown field. " "If the file isn't a syllabus you should simply return 'The uploaded file is either not a syllabus or has some other issue, please try again.'\n" f"The file contents are:\n{file_content}" ) # Send the prompt to the OpenAI API response = query_llm(base_llm, prompt) # Add the bot's response to history history.append({"role": "bot", "content": response}) # Build display history display_history = build_display_history(history) return display_history, history else: # Proceed as before # TODO: ADD MODEL FUNCTION INPUTS api_call_data = query_function_calls(base_llm, user_input) # Collect all file contents from the history file_contents = [item["content"] for item in history if item["role"] == "file"] combined_file_content = "\n".join(file_contents) if file_contents else "" # Build conversation history excluding 'file' messages and file upload notifications conversation_history = "" for item in history: if item["role"] == "user" and not item["content"].startswith("Successful File Upload:"): conversation_history += f"User: {item['content']}\n" elif item["role"] == "bot": conversation_history += f"Bot: {item['content']}\n" rag_data = my_rag.query(user_input, k=3) api_data = api_call_data query = build_prompt(rag_data, api_data, user_input, conversation_history, combined_file_content) response = query_llm(base_llm, query) history.append({"role": "bot", "content": response}) # Build display history display_history = build_display_history(history) return display_history, history def add_message(history, message): if not isinstance(history, list): history = [] # Handle file uploads for file_path in message["files"]: file_content = read_file_content(file_path) # Store the file content in history with a special role 'file' history.append({"role": "file", "content": file_content}) # Create the file upload success message file_name = os.path.basename(file_path) truncated_name = truncate_filename(file_name, 20) user_message = f"Successful File Upload: {truncated_name}" # Add this message to history with role 'user' history.append({"role": "user", "content": user_message}) # Handle text input if message["text"] is not None and message["text"].strip() != '': history.append({"role": "user", "content": message["text"]}) return history, gr.update(value=None) # Clear the textbox if __name__ == '__main__': theme = gr.themes.Soft( primary_hue="amber", secondary_hue="slate", ) # Initial bot message initial_bot_message = ( "Hello, I am Reggie, the Georgia Tech Registration chatbot. " "I am here to help you with any questions you may have about registering for classes at Georgia Tech. " "How may I assist you today?" ) # Initialize history as a list of dictionaries initial_history = [{"role": "bot", "content": initial_bot_message}] # Create the Gradio interface with gr.Blocks(theme=theme) as demo: # Added header and subtext with gr.Row(): gr.Image( value="Georgia_Tech_Buzz_logo.png", width=200, height=200, interactive=False, show_label=False, show_download_button=False, ) with gr.Column(): gr.Markdown("# REGGIE - Registration Information Chatbot") gr.Markdown( "Reggie is a chatbot aimed to help students register at GT. It can provide useful information, links, and insight into registering for classes at Georgia Tech." ) gr.Markdown( "This project was created by Arjun Verma, Ethan Haarer, Jai Jain, and Ege Gunal." ) # Initialize the chatbot with the initial message, using None for the user message chatbot = gr.Chatbot(value=[(None, initial_bot_message)], height=800) state = gr.State(initial_history) txt = gr.MultimodalTextbox( interactive=True, file_count="multiple", placeholder="Enter message or upload syllabus...", show_label=False, ) # When the user submits a message, update the history and clear the textbox chat_msg = txt.submit(add_message, [state, txt], [state, txt]) # Generate response and update the chatbot and state bot_msg = chat_msg.then(generate_response, state, [chatbot, state]) # Launch the Gradio app demo.launch()