Spaces:
Sleeping
Sleeping
Add index persisting
Browse files- .gitattributes +1 -0
- .gitignore +1 -0
- app.py +5 -16
- index.py +47 -10
- loader.py +0 -7
- persist/docstore.json +3 -0
- persist/graph_store.json +3 -0
- persist/index_store.json +3 -0
- persist/vector_store.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
persist/* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/*
|
app.py
CHANGED
@@ -1,20 +1,16 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import time
|
4 |
-
from index import build_index, build_service_context
|
5 |
-
from loader import load_documents
|
6 |
|
7 |
-
st.title("SAIRA")
|
|
|
8 |
|
9 |
@st.cache_resource
|
10 |
def load_docs_and_build_index():
|
11 |
-
|
12 |
docs = load_documents()
|
13 |
-
return build_index(docs,
|
14 |
|
15 |
-
print("Loading documents and building index...")
|
16 |
index = load_docs_and_build_index()
|
17 |
-
print("Done!")
|
18 |
|
19 |
query_engine = index.as_query_engine(streaming=True)
|
20 |
|
@@ -40,13 +36,6 @@ if prompt := st.chat_input("What is up?"):
|
|
40 |
resp = query_engine.query(prompt)
|
41 |
message_placeholder = st.empty()
|
42 |
full_response = ""
|
43 |
-
assistant_response = random.choice(
|
44 |
-
[
|
45 |
-
"Hello there! How can I assist you today?",
|
46 |
-
"Hi, human! Is there anything I can help you with?",
|
47 |
-
"Do you need help?",
|
48 |
-
]
|
49 |
-
)
|
50 |
# Simulate stream of response with milliseconds delay
|
51 |
for text in resp.response_gen:
|
52 |
full_response += text
|
|
|
1 |
import streamlit as st
|
2 |
+
from index import build_index, build_service_context, load_documents
|
|
|
|
|
|
|
3 |
|
4 |
+
st.title("SAIRA: Student Affairs AI Response Assistant")
|
5 |
+
st.caption('Welcome to the SAIRA chatbot! This bot have knowledge about Innopolis University. Feel free to write your request!')
|
6 |
|
7 |
@st.cache_resource
|
8 |
def load_docs_and_build_index():
|
9 |
+
service_context = build_service_context()
|
10 |
docs = load_documents()
|
11 |
+
return build_index(docs, service_context)
|
12 |
|
|
|
13 |
index = load_docs_and_build_index()
|
|
|
14 |
|
15 |
query_engine = index.as_query_engine(streaming=True)
|
16 |
|
|
|
36 |
resp = query_engine.query(prompt)
|
37 |
message_placeholder = st.empty()
|
38 |
full_response = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Simulate stream of response with milliseconds delay
|
40 |
for text in resp.response_gen:
|
41 |
full_response += text
|
index.py
CHANGED
@@ -1,17 +1,54 @@
|
|
1 |
-
from llama_index import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
from utils import completion_to_prompt, messages_to_prompt
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def build_service_context():
|
11 |
-
|
|
|
|
|
12 |
|
13 |
-
def build_index(documents,
|
14 |
-
|
15 |
-
return index
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index import (
|
2 |
+
VectorStoreIndex,
|
3 |
+
SimpleDirectoryReader,
|
4 |
+
ServiceContext,
|
5 |
+
StorageContext,
|
6 |
+
load_index_from_storage
|
7 |
+
)
|
8 |
+
from llama_index.vector_stores import SimpleVectorStore
|
9 |
+
from llama_index.llms import Ollama, OpenAI
|
10 |
+
import os
|
11 |
|
|
|
12 |
|
13 |
+
DOCS_DIR = "./raw"
|
14 |
+
PERSIST_DIR = './persist'
|
15 |
+
|
16 |
+
|
17 |
+
def load_documents():
|
18 |
+
documents = SimpleDirectoryReader(DOCS_DIR).load_data()
|
19 |
+
return documents
|
20 |
|
21 |
def build_service_context():
|
22 |
+
# llm = Ollama(model='mistral')
|
23 |
+
llm = OpenAI(model="gpt-3.5-turbo")
|
24 |
+
return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")
|
25 |
|
26 |
+
def build_index(documents, service_context):
|
27 |
+
persist_dir = os.path.abspath(PERSIST_DIR)
|
|
|
28 |
|
29 |
+
if os.path.exists(persist_dir + '/index_store.json'): # Load
|
30 |
+
print('Loading index...')
|
31 |
+
# Solving issue with naming
|
32 |
+
old_name = '/default__vector_store.json'
|
33 |
+
new_name = '/vector_store.json'
|
34 |
+
if os.path.exists(persist_dir + old_name):
|
35 |
+
os.rename(persist_dir + old_name, persist_dir + new_name)
|
36 |
|
37 |
+
storage_context = StorageContext.from_defaults(
|
38 |
+
vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
|
39 |
+
persist_dir=persist_dir,
|
40 |
+
)
|
41 |
+
index = load_index_from_storage(storage_context, service_context=service_context)
|
42 |
+
else: # Create
|
43 |
+
print('Creaing index...')
|
44 |
+
storage_context = StorageContext.from_defaults(
|
45 |
+
vector_store=SimpleVectorStore(),
|
46 |
+
)
|
47 |
+
index = VectorStoreIndex.from_documents(
|
48 |
+
documents,
|
49 |
+
service_context=service_context,
|
50 |
+
storage_context=storage_context
|
51 |
+
)
|
52 |
+
# storage_context.persist(persist_dir=persist_dir)
|
53 |
+
index.storage_context.persist(persist_dir=persist_dir)
|
54 |
+
return index
|
loader.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
from llama_index import SimpleDirectoryReader
|
2 |
-
|
3 |
-
|
4 |
-
def load_documents():
|
5 |
-
docs_dir = "./raw"
|
6 |
-
documents = SimpleDirectoryReader(docs_dir).load_data()
|
7 |
-
return documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
persist/docstore.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96af5711ecd41935a76bb4225821f6b22f8f02929ad8d4d03644f70a81027b2b
|
3 |
+
size 27827969
|
persist/graph_store.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e0a77744010862225c69da83c585f4f8a42fd551b044ce530dbb1eb6e16742c
|
3 |
+
size 18
|
persist/index_store.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bd39565ddeb6f0a600ce635cfaeed1c5ea8dc3f4ac6680f2fa00da63059e4d3
|
3 |
+
size 643183
|
persist/vector_store.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb84c8a62700ea73d428b0032d8994d0d5479dc9db4fbab6b07e1b0fa440b1e1
|
3 |
+
size 178546696
|