Spaces:

zlmqi
/

mis

Sleeping

mis

File size: 8,527 Bytes

f941cf3
 
 
6dce92d
 
5eeb019
6dce92d
5eeb019
e5e2605
27ac14f
6dce92d
b91f436
5242e1e
 
 
 
 
 
 
252481a
 
e4f4106
3b8a5dd
fac2a29
3b8a5dd
577ae7c
252481a
5242e1e
 
 
 
 
6dce92d
 
 
 
f941cf3
5242e1e
252481a
6dce92d
5eeb019
 
5242e1e
b91f436
5d0c781
b91f436
ec98573
 
3969fe1
 
 
 
ec98573
 
5eeb019
 
f941cf3
5242e1e
6dce92d
 
 
 
 
 
f941cf3
57d9507
5242e1e
 
 
 
 
 
5d0c781
f941cf3
4dd75c6
 
ec98573
f941cf3
e94cac0
292ea38
61e9b9a
 
2ee4558
61e9b9a
f941cf3
b91f436
57d9507
 
 
 
 
 
 
 
 
6dce92d
 
 
 
 
 
 
ed60c05
 
6dce92d
5242e1e
 
 
 
 
 
c262e5a
0b7c2d1
 
 
 
 
 
5242e1e
 
6dce92d
292ea38
5242e1e
b91f436
6dce92d
5eeb019
 
6dce92d
5242e1e
 
 
5eeb019
67ea930
5eeb019
 
5242e1e
 
 
 
 
 
e94cac0
0b7c2d1
d32986c
d0c4f33
5242e1e
f941cf3
 
5eeb019
 
 
 
 
 
 
 
 
 
 
f941cf3
57d9507
f941cf3
 
 
 
 
57d9507
 
 
 
 
 
 
f941cf3
 
 
 
57d9507
814650a
 
6e5219f
2274f21
7fc68bf
3969fe1
75dfc1c
2274f21
f941cf3
 
 
 
 
 
 
 
 
 
 
 
 
5242e1e
 
 
 
 
 
 
f941cf3
1bd304f
57d9507
5242e1e
f941cf3
67fca87
5242e1e
 
 
ea313d8
9139997
5242e1e
 
 
e5e2605
5736da2
5242e1e
 
3587122
827a707
63cb614
2e360ff
3587122
 
ee34f3d
 
fd43f35
ee34f3d
 
 
 
 
ddf5e89
ee34f3d
e5e2605

#12/27/2024
# add Chatstore to keep chat memory for different users

# 12/25/2024
# add LlamaIndex ChatMemoryBuffer to keep conversion going
# (does not track individual user) add an unique id and sequence number to keep track of a user session
# upgrade llama-index to version 0.10: migrate from ServiceContext to Settings
# use socket and gradio Request to get client ip
# upgrade gradio and use the new ChatInterface

# 5/1/2024
# This version added saving chat history to a log file (need persist data from a space to a dataset)
# Updated the GPT model to gpt-4
# Add timestamp and ip address

# 2/23/2024
# This version uses different method in llama index to define llm model
# Removed deprecated classes and replaced with newest dependencies

# Start by setting token and debug mode before starting schedulers
import os
from huggingface_hub import logging, login

# The access token must be saved in the secrets of this space first
login(token=os.getenv("new_data_token"), write_permission=True)
#logging.set_verbosity_debug()

import openai
import json
import gradio as gr
from openai import OpenAI

from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
from llama_index.core.storage.chat_store import SimpleChatStore

# add datetime and ip to the log file
from datetime import datetime
#import random
import socket;

# access data folder of persistent storage
from pathlib import Path
from huggingface_hub import CommitScheduler
from uuid import uuid4

# use HfFileSystem interface to HF hub
from huggingface_hub import HfFileSystem
fs = HfFileSystem()

# generate an unique identifier for the session
session_id = uuid4()

# global variables
client_ip = ""
storage_context = StorageContext.from_defaults(persist_dir='./')
index = load_index_from_storage(storage_context)
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
# use the new global Settings object (llama-index 0.12)
Settings.llm = OpenAI(temperature=0.5, model="gpt-4")
#Settings.num_output = 512
#Settings.context_window = 3900
chat_store = SimpleChatStore()
user_chat_dir = Path("datasets/zlmqi/history_data/chat_store_mis")

class Chatbot:
    def __init__(self, api_key, index):
        self.index = index
        openai.api_key = api_key
        self.chat_history = []

        # set chat log data path in data folder (persistent storage)
        dataset_dir = Path("logs")
        dataset_dir.mkdir(parents=True, exist_ok=True)
        self.dataset_path = dataset_dir / f"chat_log_{session_id}.json"
        # set chat log scheduler
        self.scheduler = CommitScheduler(
            repo_id="history_data",
            repo_type="dataset",
            folder_path=dataset_dir,
            path_in_repo="data_mis",
        )

        
    def generate_response(self, user_input, user_id):
        # use chat_store to store chat history for each user (client_ip)
        memory = ChatMemoryBuffer.from_defaults(
            token_limit = 1500,
            chat_store = chat_store,
            chat_store_key = user_id
        )
 
        
        chat_engine = index.as_chat_engine(
            chat_mode = "context",
            llm = Settings.llm,
            memory = memory,
            system_prompt=(
                "You are a chatbot and learning assistant, able to have normal interaction, as well as"
                " answer questions specific to the context."
            ),
        )
        response = chat_engine.chat(user_input)
        
        # generate response
        message = {"role": "assistant", "content": response.response}
        
        return message
    
    # do not need this function if use append mode when dump data in file
    #def load_chat_history(self):
    #    try:
    #        with open(self.dataset_path, 'r') as f:
    #            self.chat_history = json.load(f)
    #    except FileNotFoundError:
    #        pass
    
    def append_chat_history(self, user_input, output):
        # create a dictionary for the chat history
        #self.chat_history = []
        dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        #print(dt)

        #global seq_no
        #seq_no += 1
               
        # save the data in dictionary format
        dictionary = {
            "datetime": dt,
            "client_ip": client_ip,
            #"session_id": session_id,
            #"session_id2": session_id2,
            #"seq_no:": seq_no,
            "user": user_input,
            "assistant": output
        }
        self.chat_history.append(dictionary)

    def save_chat_history(self):
        with self.scheduler.lock:
            with self.dataset_path.open("a") as f:
                json.dump(self.chat_history, f)
                f.write("\n")

    def get_client_ip(self, request):
        global client_ip
        client_ip = request.client.host
        local_ip = socket.gethostbyname(socket.gethostbyname(""))
        x_forwarded_for = request.headers.get('x-forwarded-for')
        #headers = request.headers
        if x_forwarded_for:
            client_ip = x_forwarded_for
        #print("client_ip", client_ip)
        char_index = client_ip.find(",")
        if char_index > 0:
            client_ip = client_ip[:char_index]
        #print("client_ip", client_ip)

    
    # load user chat store if available
    def load_user_chat_store(self, user_id):
        global chat_store
        # reset chat_store for each request
        chat_store = SimpleChatStore()
        user_chat_path= user_chat_dir / f"chat_store{user_id}.json" 
        #if os.path.exists(user_chat_path):
        with fs.open(user_chat_path, "r") as f:
            chat_store = SimpleChatStore.from_persist_path(f)
        
        #print("load chat store: ", chat_store.json())
    

    # create a chat store and use it as chat memory for each user, use client_ip as the unique id     
    def save_user_chat_store(self, user_id):
        # set chat log data path in data folder (persistent storage)
        
        user_chat_dir.mkdir(parents=True, exist_ok=True)
        user_chat_path = user_chat_dir / f"chat_store_{user_id}.json"
        #chat_store.persist(user_chat_path)
        
        with fs.open(user_chat_path, "w") as f:
            json.dump(chat_store.json(), f)
        
        
def create_bot(message, history, request: gr.Request):
    bot = Chatbot(os.getenv("OPENAI_API_KEY"), index=index)
    #bot.load_chat_history();
    global client_ip
    user_input = message
    
    if user_input:
        bot.get_client_ip(request)
        # convert client ip to a user id
        user_id = client_ip.replace(".", "")
        #print("user id: ", user_id)
 
        # use moderations endpoint to check input
        client = openai.OpenAI()
        response_mod = client.moderations.create(input=user_input)
        response_dict = response_mod.model_dump()
        flagged = response_dict['results'][0]['flagged']
        #print("Flagged:", flagged)
    
        if not flagged:
            #load chat store for individual user
            #bot.load_user_chat_store(user_id)
            response_bot = bot.generate_response(user_input, user_id)
            output = response_bot['content']
            #save chat store for individual user
            bot.save_user_chat_store(user_id)
        else:
             output = "Invalid request."
        
        bot.append_chat_history(user_input, output)
        bot.save_chat_history()
        
        return output

'''
inputs = gr.components.Textbox(lines=7, label="Ask the AI chatbot any questions that you have related to the course and the subject content.")
outputs = gr.components.Textbox(label="Response")


gr.Interface(fn=create_bot, inputs=inputs, outputs=outputs, title="AI Assistant",
             description="This is a virtual learning assistant designed for MIS course (Beta version 2.0, powered by GPT-4).\nNote: Chatbot can make mistakes. Please double-check important information."
            ).launch(share=True)
          
'''
gr.ChatInterface(
    create_bot,
    type="messages",
    chatbot=gr.Chatbot(height=300),
    #textbox=gr.Textbox(container=False, scale=7),
    title="Educational Chatbot",
    description="Ask the AI chatbot any questions related to the course or subject",
    theme="Ocean",
    examples=["Tell me about the course.","how many assignments do I have?","Tell me an IT joke."],
).launch(share=True)