File size: 6,673 Bytes
7087237
2acc92e
b0f4221
364f62b
 
1e7b001
 
 
 
2acc92e
1e7b001
 
 
 
 
 
 
 
 
8726be7
b6b7f09
1e7b001
179c61c
 
1e7b001
8adbb97
1e7b001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8726be7
9d96069
 
 
 
 
 
1e7b001
 
8726be7
1e7b001
 
 
 
8417e14
1e7b001
 
 
 
9d96069
 
1e7b001
 
 
 
 
 
 
 
 
 
8417e14
9d96069
8417e14
9d96069
1e7b001
8726be7
1e7b001
 
 
 
 
b6b7f09
 
787a7f1
1e7b001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d02cf9a
028cf03
b0f4221
ba8fcd5
 
 
 
 
b0f4221
990f4c6
028cf03
 
990f4c6
 
 
 
9d96069
 
b0f4221
412ad94
990f4c6
028cf03
990f4c6
d02cf9a
 
a8a0d51
d02cf9a
4dbdf82
d02cf9a
 
 
 
 
7087237
d02cf9a
 
 
 
7087237
787a7f1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
import os
import time
from langchain_community.llms import HuggingFaceHub
from langchain_community.retrievers import BM25Retriever
from langchain_huggingface import HuggingFaceEmbeddings # embeding the documents in the vectorstore
from langchain_huggingface import ChatHuggingFace # chat model
from langchain.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.retrievers import EnsembleRetriever
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.messages import HumanMessage,AIMessage
from langchain.tools.retriever import create_retriever_tool
from langchain_groq import ChatGroq
from transformers import pipeline

token = os.getenv('gr_tkn')
os.environ["GROQ_API_KEY"] = token
def build_rag_chain():
    pdfloader = PyPDFLoader('kuDoc.pdf')
    docs = pdfloader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=0)
    texts = splitter.split_documents(docs)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    db = Chroma.from_documents(texts,embedding=embeddings)

    vector_retriever = db.as_retriever(search_type='similarity',search_kwargs = {'k':5})
    keyword_retriever = BM25Retriever.from_documents(documents=texts,k=5)

    ensemble_retriever = EnsembleRetriever(
        retrievers=[keyword_retriever, vector_retriever],
        weights=[0.5, 0.5]
    )

    llm = ChatGroq(
        model = "llama-3.1-8b-instant",
        temperature = 0,
        max_tokens = None,
        timeout = None,
        max_retries = 2,
        streaming = True
    )

    chat_model = llm

    memory_system_prompt = (
        "You are ChatKU, an AI assistant that helps users learn more about Kenyatta University. "
        "Greet the user by saying: 'Hello my name is ChatKU, I can help you to get to know more about Kenyatta University, so how can I help you dear?' "
        "Only greet the user  when the user greets you otherwise answer the question directly if the user has not greeted you"
        "You should help reformulate follow-up questions into standalone questions. "
        "Given the chat history and the latest user message, rewrite the user’s message as a clear, self-contained question that incorporates all relevant context. "
        "Do not invent new information. "
        "If the user introduces themselves (e.g., 'Hello, I am Steve' or 'My name is Joy'), remember their name for the rest of the conversation. "
        "If the user later asks 'What is my name?' or similar, respond using the name they previously provided."
        "And don't keep saying their name while answering questions,you may only say it at the beginning and end of conversation"
        )
    memory_prompt = ChatPromptTemplate.from_messages([
        ('system',memory_system_prompt),
        MessagesPlaceholder('chat_history'),# allow us to pass a list of messages to the prompt using 'chat_history'
        ('human','{input}')
    ])

    system_prompt = (
        "You are ChatKU, an AI assistant that helps users learn more about Kenyatta University."
        "Greet the user by saying: 'Hello my name is ChatKU, I can help you to get to know more about Kenyatta University, so how can I help you dear?' "
        "Only greet the user  when the user greets you otherwise answer the question directly if the user has not greeted you"
        "Remember the user's name when they introduce it (e.g., 'Hello, am Steve')"
        "Give answers like ChatGPT 4, very detailed"
        "And don't keep saying their name while answering questions,you may only say it at the beginning and end of conversation"
        "Use only the information provided in the context below. "
        "think like an agent before answering the question and give the correct answer"
        "Do not make up information or add external knowledge. "
        "If the answer cannot be found in the context, say so clearly. "
        "Keep your answers concise, natural, and friendly. "
        "Feel free to address the user by name if mentioned in the chat history. "
        "Avoid repeating long context word-for-word. "
        "Never start your answer with phrases like 'Based on the provided context'...or 'According to the information i have...' "
        "and don't include them anywhere in your answer"
        "You are free to use emojis"
        "Consider bolded or stylized text in the context as important keywords.\n\n"
        "Context:\n{context}\n\n"
    )

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

    history_aware_retriever = create_history_aware_retriever(chat_model,ensemble_retriever,memory_prompt)

    question_answer_chain = create_stuff_documents_chain(chat_model, qa_prompt)
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

    return rag_chain

rag_chain = build_rag_chain()
chat_history = []

def chatku_fn(message, history):
    # convert history to LangChain format so that the application can be unique across different devices
    chat_history = []
    for human, ai in history:
        chat_history.append(HumanMessage(content=human))
        chat_history.append(AIMessage(content=ai))

    # call the RAG chain with streaming
    response_stream = rag_chain.stream({
        "input": message,
        "chat_history": chat_history
    })

    partial_answer = ""
    for chunk in response_stream:
        delta = chunk.get("answer", "")
        partial_answer += delta

        time.sleep(0.06) # delay the streaming of tokens
        yield partial_answer  # stream output piece-by-piece

  
with gr.Blocks(fill_height = True) as demo:
    gr.Markdown(
        f"<h2 style='text-align: center;'>What's your agenda of today about KU?</h2>"
    )

    gr.ChatInterface(
        fn=chatku_fn,
        chatbot=gr.Chatbot(label="💬 ChatKU"),
        autoscroll=True
    )

    gr.Markdown(
        "⚠️ **ChatKU can make mistakes, verify important information.**",
        elem_id="footer"
    )
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    demo.launch(server_name="0.0.0.0", server_port=port)