Spaces:

gsvc
/

krish-sri-bot

Sleeping

App Files Files Community

chgsvc2 commited on Sep 12, 2023

Commit

ec97e51

1 Parent(s): a5f357a

Added necessary files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +47 -0
config.py +25 -0
edubot.py +60 -0
faiss1/education1/index.faiss +3 -0
faiss1/education1/index.pkl +3 -0
requirements.txt +9 -0
res/llama-2-7b-chat.ggmlv3.q2_K.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.faiss filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from edubot import EduBotCreator
+from config import *
+import streamlit as st
+from streamlit_chat import message
+@st.cache_resource(show_spinner=True)
+def create_edubot():
+    edubotcreator = EduBotCreator()
+    edubot = edubotcreator.create_edubot()
+    return edubot
+edubot = create_edubot()
+def infer_edubot(prompt):
+    model_out = edubot(prompt)
+    answer = model_out['result']
+    return answer
+def display_conversation(history):
+    for i in range(len(history["assistant"])):
+        message(history["user"][i], is_user=True, key=str(i) + "_user")
+        message(history["assistant"][i],key=str(i))
+def main():
+    st.title("Krish Sir AI Bot 📚🤖")
+    st.subheader("A bot created using Langchain 🦜 to run on cpu making your learning process easier")
+    user_input = st.text_input("Enter your query")
+    if "assistant" not in st.session_state:
+        st.session_state["assistant"] = ["I am ready to help you"]
+    if "user" not in st.session_state:
+        st.session_state["user"] = ["Hey there!"]
+    if st.button("Answer"):
+        answer = infer_edubot({'query': user_input})
+        st.session_state["user"].append(user_input)
+        st.session_state["assistant"].append(answer)
+        if st.session_state["assistant"]:
+            display_conversation(st.session_state)
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+DATA_DIR_PATH = "data/"
+VECTOR_DB_PATH = "faiss/education"
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 200
+EMBEDDER = "thenlper/gte-large"
+DEVICE = "cpu"
+PROMPT_TEMPLATE = '''
+With the information provided try to answer the question.
+If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
+So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers
+Context: {context}
+Question: {question}
+Do provide only helpful answers
+Helpful answer:
+'''
+INP_VARS = ['context', 'question']
+CHAIN_TYPE = "stuff"
+SEARCH_KWARGS = {'k': 2}
+MODEL_CKPT = "res/llama-2-7b-chat.ggmlv3.q4_1.bin"
+MODEL_TYPE = "llama"
+MAX_NEW_TOKENS = 512
+TEMPERATURE = 0.9

edubot.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from langchain import PromptTemplate
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import CTransformers
+from langchain.chains import RetrievalQA
+from config import *
+class EduBotCreator:
+    def __init__(self):
+        self.prompt_temp = PROMPT_TEMPLATE
+        self.input_variables = INP_VARS
+        self.chain_type = CHAIN_TYPE
+        self.search_kwargs = SEARCH_KWARGS
+        self.embedder = EMBEDDER
+        self.vector_db_path = VECTOR_DB_PATH
+        self.model_ckpt = MODEL_CKPT
+        self.model_type = MODEL_TYPE
+        self.max_new_tokens = MAX_NEW_TOKENS
+        self.temperature = TEMPERATURE
+    def create_custom_prompt(self):
+        custom_prompt_temp = PromptTemplate(template=self.prompt_temp,
+                            input_variables=self.input_variables)
+        return custom_prompt_temp
+    def load_llm(self):
+        llm = CTransformers(
+                model = self.model_ckpt,
+                model_type=self.model_type,
+                max_new_tokens = self.max_new_tokens,
+                temperature = self.temperature
+            )
+        return llm
+    def load_vectordb(self):
+        hfembeddings = HuggingFaceEmbeddings(
+                            model_name=self.embedder,
+                            model_kwargs={'device': 'cpu'}
+                        )
+        vector_db = FAISS.load_local(self.vector_db_path, hfembeddings)
+        return vector_db
+    def create_bot(self, custom_prompt, vectordb, llm):
+        retrieval_qa_chain = RetrievalQA.from_chain_type(
+                                llm=llm,
+                                chain_type=self.chain_type,
+                                retriever=vectordb.as_retriever(search_kwargs=self.search_kwargs),
+                                return_source_documents=True,
+                                chain_type_kwargs={"prompt": custom_prompt}
+                            )
+        return retrieval_qa_chain
+    def create_edubot(self):
+        self.custom_prompt = self.create_custom_prompt()
+        self.vector_db = self.load_vectordb()
+        self.llm = self.load_llm()
+        self.bot = self.create_bot(self.custom_prompt, self.vector_db, self.llm)
+        return self.bot

faiss1/education1/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddced03d12a951b7c75cf54b6b0ea2fde395ee5629b8c2e5d54a62366c3e05c0
+size 1251885

faiss1/education1/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9963acc74fb1fbc70d5955ef11f1cf5aef8b21e28b3643b3f8c6a3b6bc5d85c9
+size 452274

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+pypdf
+langchain
+torch
+accelerate
+bitsandbytes
+transformers
+sentence_transformers
+faiss_cpu
+streamlit

res/llama-2-7b-chat.ggmlv3.q2_K.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45833e0b59c8fe80676c664f556031fc411da8856e0716ac7b8ed201b7221c08
+size 2866807424