SAIRA / index.py
batalovme's picture
Custom prompts and gpt-4 model
78af57e
raw
history blame contribute delete
No virus
3.24 kB
from llama_index import (
VectorStoreIndex,
SimpleDirectoryReader,
ServiceContext,
StorageContext,
load_index_from_storage
)
from llama_index.prompts import ChatPromptTemplate, ChatMessage, MessageRole
from llama_index.vector_stores import SimpleVectorStore
from llama_index.llms import Ollama, OpenAI
import os
DOCS_DIR = "./raw"
PERSIST_DIR = './persist'
def load_documents():
documents = SimpleDirectoryReader(DOCS_DIR).load_data()
return documents
def build_service_context():
# llm = Ollama(model='mistral')
llm = OpenAI(model="gpt-4-1106-preview")
return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")
def build_index(documents, service_context):
persist_dir = os.path.abspath(PERSIST_DIR)
if os.path.exists(persist_dir + '/index_store.json'): # Load
print('Loading index...')
# Solving issue with naming
old_name = '/default__vector_store.json'
new_name = '/vector_store.json'
if os.path.exists(persist_dir + old_name):
os.rename(persist_dir + old_name, persist_dir + new_name)
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
persist_dir=persist_dir,
)
index = load_index_from_storage(storage_context, service_context=service_context)
else: # Create
print('Creaing index...')
storage_context = StorageContext.from_defaults(
vector_store=SimpleVectorStore(),
)
index = VectorStoreIndex.from_documents(
documents,
service_context=service_context,
storage_context=storage_context
)
# storage_context.persist(persist_dir=persist_dir)
index.storage_context.persist(persist_dir=persist_dir)
return index
def change_prompts(query_engine):
message_templates = [
ChatMessage(content='''You are SAIRA (Student Affairs AI Response Assistant) - expert Q&A system for the students of Innopolis University.
Always answer the query using the provided context information, and not prior knowledge.
Never directly reference the given context in your answer. Never use file names or any other meta information in the answer.
If you mention person or department, provide also their Telegram or E-mail.
If you mention some Telegram chat, give the link to it''', role=MessageRole.SYSTEM),
ChatMessage(content='''Context information:
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Avoid statements like 'based on the context' or 'the context information', 'in the context' or anything along those lines.
Never use word 'context' in the answer!
If you can't write answer, or it is not providied in context, just write '<SPECIALIST>' as an answer, and the request will be transferred to the specialist.
Write '<SPECIALIST>' instead of asking to contact Student Affairs.''', role=MessageRole.USER),
]
qa_prompt_tmpl = ChatPromptTemplate(message_templates)
query_engine.update_prompts(
{"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)