Spaces:

sairaproject
/

SAIRA

Sleeping

App Files Files Community

sm1rk commited on Nov 29, 2023

Commit

2810627

•

1 Parent(s): 0266a31

Add index persisting

Browse files

Files changed (9) hide show

.gitattributes +1 -0
.gitignore +1 -0
app.py +5 -16
index.py +47 -10
loader.py +0 -7
persist/docstore.json +3 -0
persist/graph_store.json +3 -0
persist/index_store.json +3 -0
persist/vector_store.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+persist/* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/*

app.py CHANGED Viewed

@@ -1,20 +1,16 @@
 import streamlit as st
-import random
-import time
-from index import build_index, build_service_context
-from loader import load_documents
-st.title("SAIRA")
 @st.cache_resource
 def load_docs_and_build_index():
-    context = build_service_context()
     docs = load_documents()
-    return build_index(docs, context)
-print("Loading documents and building index...")
 index = load_docs_and_build_index()
-print("Done!")
 query_engine = index.as_query_engine(streaming=True)
@@ -40,13 +36,6 @@ if prompt := st.chat_input("What is up?"):
         resp = query_engine.query(prompt)
         message_placeholder = st.empty()
         full_response = ""
-        assistant_response = random.choice(
-            [
-                "Hello there! How can I assist you today?",
-                "Hi, human! Is there anything I can help you with?",
-                "Do you need help?",
-            ]
-        )
         # Simulate stream of response with milliseconds delay
         for text in resp.response_gen:
             full_response += text

 import streamlit as st
+from index import build_index, build_service_context, load_documents
+st.title("SAIRA: Student Affairs AI Response Assistant")
+st.caption('Welcome to the SAIRA chatbot! This bot have knowledge about Innopolis University. Feel free to write your request!')
 @st.cache_resource
 def load_docs_and_build_index():
+    service_context = build_service_context()
     docs = load_documents()
+    return build_index(docs, service_context)
 index = load_docs_and_build_index()
 query_engine = index.as_query_engine(streaming=True)
         resp = query_engine.query(prompt)
         message_placeholder = st.empty()
         full_response = ""
         # Simulate stream of response with milliseconds delay
         for text in resp.response_gen:
             full_response += text

index.py CHANGED Viewed

@@ -1,17 +1,54 @@
-from llama_index import download_loader, VectorStoreIndex, SimpleDirectoryReader, ServiceContext
-from llama_index.tools import QueryEngineTool, ToolMetadata
-from llama_index.query_engine import SubQuestionQueryEngine
-from llama_index.callbacks import CallbackManager, LlamaDebugHandler
-from llama_index.llms import LlamaCPP
-from utils import completion_to_prompt, messages_to_prompt
 def build_service_context():
-    return ServiceContext.from_defaults(embed_model='local')
-def build_index(documents, context):
-    index = VectorStoreIndex.from_documents(documents, service_context=context)
-    return index

+from llama_index import (
+    VectorStoreIndex,
+    SimpleDirectoryReader,
+    ServiceContext,
+    StorageContext,
+    load_index_from_storage
+)
+from llama_index.vector_stores import SimpleVectorStore
+from llama_index.llms import Ollama, OpenAI
+import os
+DOCS_DIR = "./raw"
+PERSIST_DIR = './persist'
+def load_documents():
+    documents = SimpleDirectoryReader(DOCS_DIR).load_data()
+    return documents
 def build_service_context():
+    # llm = Ollama(model='mistral')
+    llm = OpenAI(model="gpt-3.5-turbo")
+    return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")
+def build_index(documents, service_context):
+    persist_dir = os.path.abspath(PERSIST_DIR)
+    if os.path.exists(persist_dir + '/index_store.json'):  # Load
+        print('Loading index...')
+        # Solving issue with naming
+        old_name = '/default__vector_store.json'
+        new_name = '/vector_store.json'
+        if os.path.exists(persist_dir + old_name):
+            os.rename(persist_dir + old_name, persist_dir + new_name)
+        storage_context = StorageContext.from_defaults(
+            vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
+            persist_dir=persist_dir,
+        )
+        index = load_index_from_storage(storage_context, service_context=service_context)
+    else:  # Create
+        print('Creaing index...')
+        storage_context = StorageContext.from_defaults(
+            vector_store=SimpleVectorStore(),
+        )
+        index = VectorStoreIndex.from_documents(
+            documents,
+            service_context=service_context,
+            storage_context=storage_context
+        )
+        # storage_context.persist(persist_dir=persist_dir)
+        index.storage_context.persist(persist_dir=persist_dir)
+    return index

loader.py DELETED Viewed

@@ -1,7 +0,0 @@
-from llama_index import SimpleDirectoryReader
-def load_documents():
-    docs_dir = "./raw"
-    documents = SimpleDirectoryReader(docs_dir).load_data()
-    return documents

persist/docstore.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96af5711ecd41935a76bb4225821f6b22f8f02929ad8d4d03644f70a81027b2b
+size 27827969

persist/graph_store.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e0a77744010862225c69da83c585f4f8a42fd551b044ce530dbb1eb6e16742c
+size 18

persist/index_store.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bd39565ddeb6f0a600ce635cfaeed1c5ea8dc3f4ac6680f2fa00da63059e4d3
+size 643183

persist/vector_store.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb84c8a62700ea73d428b0032d8994d0d5479dc9db4fbab6b07e1b0fa440b1e1
+size 178546696