Spaces:
Sleeping
Sleeping
File size: 8,527 Bytes
f941cf3 6dce92d 5eeb019 6dce92d 5eeb019 e5e2605 27ac14f 6dce92d b91f436 5242e1e 252481a e4f4106 3b8a5dd fac2a29 3b8a5dd 577ae7c 252481a 5242e1e 6dce92d f941cf3 5242e1e 252481a 6dce92d 5eeb019 5242e1e b91f436 5d0c781 b91f436 ec98573 3969fe1 ec98573 5eeb019 f941cf3 5242e1e 6dce92d f941cf3 57d9507 5242e1e 5d0c781 f941cf3 4dd75c6 ec98573 f941cf3 e94cac0 292ea38 61e9b9a 2ee4558 61e9b9a f941cf3 b91f436 57d9507 6dce92d ed60c05 6dce92d 5242e1e c262e5a 0b7c2d1 5242e1e 6dce92d 292ea38 5242e1e b91f436 6dce92d 5eeb019 6dce92d 5242e1e 5eeb019 67ea930 5eeb019 5242e1e e94cac0 0b7c2d1 d32986c d0c4f33 5242e1e f941cf3 5eeb019 f941cf3 57d9507 f941cf3 57d9507 f941cf3 57d9507 814650a 6e5219f 2274f21 7fc68bf 3969fe1 75dfc1c 2274f21 f941cf3 5242e1e f941cf3 1bd304f 57d9507 5242e1e f941cf3 67fca87 5242e1e ea313d8 9139997 5242e1e e5e2605 5736da2 5242e1e 3587122 827a707 63cb614 2e360ff 3587122 ee34f3d fd43f35 ee34f3d ddf5e89 ee34f3d e5e2605 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
#12/27/2024
# add Chatstore to keep chat memory for different users
# 12/25/2024
# add LlamaIndex ChatMemoryBuffer to keep conversion going
# (does not track individual user) add an unique id and sequence number to keep track of a user session
# upgrade llama-index to version 0.10: migrate from ServiceContext to Settings
# use socket and gradio Request to get client ip
# upgrade gradio and use the new ChatInterface
# 5/1/2024
# This version added saving chat history to a log file (need persist data from a space to a dataset)
# Updated the GPT model to gpt-4
# Add timestamp and ip address
# 2/23/2024
# This version uses different method in llama index to define llm model
# Removed deprecated classes and replaced with newest dependencies
# Start by setting token and debug mode before starting schedulers
import os
from huggingface_hub import logging, login
# The access token must be saved in the secrets of this space first
login(token=os.getenv("new_data_token"), write_permission=True)
#logging.set_verbosity_debug()
import openai
import json
import gradio as gr
from openai import OpenAI
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.core.memory.chat_memory_buffer import ChatMemoryBuffer
from llama_index.core.storage.chat_store import SimpleChatStore
# add datetime and ip to the log file
from datetime import datetime
#import random
import socket;
# access data folder of persistent storage
from pathlib import Path
from huggingface_hub import CommitScheduler
from uuid import uuid4
# use HfFileSystem interface to HF hub
from huggingface_hub import HfFileSystem
fs = HfFileSystem()
# generate an unique identifier for the session
session_id = uuid4()
# global variables
client_ip = ""
storage_context = StorageContext.from_defaults(persist_dir='./')
index = load_index_from_storage(storage_context)
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
# use the new global Settings object (llama-index 0.12)
Settings.llm = OpenAI(temperature=0.5, model="gpt-4")
#Settings.num_output = 512
#Settings.context_window = 3900
chat_store = SimpleChatStore()
user_chat_dir = Path("datasets/zlmqi/history_data/chat_store_mis")
class Chatbot:
def __init__(self, api_key, index):
self.index = index
openai.api_key = api_key
self.chat_history = []
# set chat log data path in data folder (persistent storage)
dataset_dir = Path("logs")
dataset_dir.mkdir(parents=True, exist_ok=True)
self.dataset_path = dataset_dir / f"chat_log_{session_id}.json"
# set chat log scheduler
self.scheduler = CommitScheduler(
repo_id="history_data",
repo_type="dataset",
folder_path=dataset_dir,
path_in_repo="data_mis",
)
def generate_response(self, user_input, user_id):
# use chat_store to store chat history for each user (client_ip)
memory = ChatMemoryBuffer.from_defaults(
token_limit = 1500,
chat_store = chat_store,
chat_store_key = user_id
)
chat_engine = index.as_chat_engine(
chat_mode = "context",
llm = Settings.llm,
memory = memory,
system_prompt=(
"You are a chatbot and learning assistant, able to have normal interaction, as well as"
" answer questions specific to the context."
),
)
response = chat_engine.chat(user_input)
# generate response
message = {"role": "assistant", "content": response.response}
return message
# do not need this function if use append mode when dump data in file
#def load_chat_history(self):
# try:
# with open(self.dataset_path, 'r') as f:
# self.chat_history = json.load(f)
# except FileNotFoundError:
# pass
def append_chat_history(self, user_input, output):
# create a dictionary for the chat history
#self.chat_history = []
dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
#print(dt)
#global seq_no
#seq_no += 1
# save the data in dictionary format
dictionary = {
"datetime": dt,
"client_ip": client_ip,
#"session_id": session_id,
#"session_id2": session_id2,
#"seq_no:": seq_no,
"user": user_input,
"assistant": output
}
self.chat_history.append(dictionary)
def save_chat_history(self):
with self.scheduler.lock:
with self.dataset_path.open("a") as f:
json.dump(self.chat_history, f)
f.write("\n")
def get_client_ip(self, request):
global client_ip
client_ip = request.client.host
local_ip = socket.gethostbyname(socket.gethostbyname(""))
x_forwarded_for = request.headers.get('x-forwarded-for')
#headers = request.headers
if x_forwarded_for:
client_ip = x_forwarded_for
#print("client_ip", client_ip)
char_index = client_ip.find(",")
if char_index > 0:
client_ip = client_ip[:char_index]
#print("client_ip", client_ip)
# load user chat store if available
def load_user_chat_store(self, user_id):
global chat_store
# reset chat_store for each request
chat_store = SimpleChatStore()
user_chat_path= user_chat_dir / f"chat_store{user_id}.json"
#if os.path.exists(user_chat_path):
with fs.open(user_chat_path, "r") as f:
chat_store = SimpleChatStore.from_persist_path(f)
#print("load chat store: ", chat_store.json())
# create a chat store and use it as chat memory for each user, use client_ip as the unique id
def save_user_chat_store(self, user_id):
# set chat log data path in data folder (persistent storage)
user_chat_dir.mkdir(parents=True, exist_ok=True)
user_chat_path = user_chat_dir / f"chat_store_{user_id}.json"
#chat_store.persist(user_chat_path)
with fs.open(user_chat_path, "w") as f:
json.dump(chat_store.json(), f)
def create_bot(message, history, request: gr.Request):
bot = Chatbot(os.getenv("OPENAI_API_KEY"), index=index)
#bot.load_chat_history();
global client_ip
user_input = message
if user_input:
bot.get_client_ip(request)
# convert client ip to a user id
user_id = client_ip.replace(".", "")
#print("user id: ", user_id)
# use moderations endpoint to check input
client = openai.OpenAI()
response_mod = client.moderations.create(input=user_input)
response_dict = response_mod.model_dump()
flagged = response_dict['results'][0]['flagged']
#print("Flagged:", flagged)
if not flagged:
#load chat store for individual user
#bot.load_user_chat_store(user_id)
response_bot = bot.generate_response(user_input, user_id)
output = response_bot['content']
#save chat store for individual user
bot.save_user_chat_store(user_id)
else:
output = "Invalid request."
bot.append_chat_history(user_input, output)
bot.save_chat_history()
return output
'''
inputs = gr.components.Textbox(lines=7, label="Ask the AI chatbot any questions that you have related to the course and the subject content.")
outputs = gr.components.Textbox(label="Response")
gr.Interface(fn=create_bot, inputs=inputs, outputs=outputs, title="AI Assistant",
description="This is a virtual learning assistant designed for MIS course (Beta version 2.0, powered by GPT-4).\nNote: Chatbot can make mistakes. Please double-check important information."
).launch(share=True)
'''
gr.ChatInterface(
create_bot,
type="messages",
chatbot=gr.Chatbot(height=300),
#textbox=gr.Textbox(container=False, scale=7),
title="Educational Chatbot",
description="Ask the AI chatbot any questions related to the course or subject",
theme="Ocean",
examples=["Tell me about the course.","how many assignments do I have?","Tell me an IT joke."],
).launch(share=True)
|