Spaces:

sairaproject
/

SAIRA

Sleeping

App Files Files Community

SAIRA / index.py

batalovme

Custom prompts and gpt-4 model

78af57e 11 months ago

raw

history blame

3.24 kB

	from llama_index import (
	VectorStoreIndex,
	SimpleDirectoryReader,
	ServiceContext,
	StorageContext,
	load_index_from_storage
	)
	from llama_index.prompts import ChatPromptTemplate, ChatMessage, MessageRole
	from llama_index.vector_stores import SimpleVectorStore
	from llama_index.llms import Ollama, OpenAI
	import os


	DOCS_DIR = "./raw"
	PERSIST_DIR = './persist'


	def load_documents():
	documents = SimpleDirectoryReader(DOCS_DIR).load_data()
	return documents

	def build_service_context():
	# llm = Ollama(model='mistral')
	llm = OpenAI(model="gpt-4-1106-preview")
	return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")

	def build_index(documents, service_context):
	persist_dir = os.path.abspath(PERSIST_DIR)

	if os.path.exists(persist_dir + '/index_store.json'): # Load
	print('Loading index...')
	# Solving issue with naming
	old_name = '/default__vector_store.json'
	new_name = '/vector_store.json'
	if os.path.exists(persist_dir + old_name):
	os.rename(persist_dir + old_name, persist_dir + new_name)

	storage_context = StorageContext.from_defaults(
	vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
	persist_dir=persist_dir,
	)
	index = load_index_from_storage(storage_context, service_context=service_context)
	else: # Create
	print('Creaing index...')
	storage_context = StorageContext.from_defaults(
	vector_store=SimpleVectorStore(),
	)
	index = VectorStoreIndex.from_documents(
	documents,
	service_context=service_context,
	storage_context=storage_context
	)
	# storage_context.persist(persist_dir=persist_dir)
	index.storage_context.persist(persist_dir=persist_dir)
	return index

	def change_prompts(query_engine):
	message_templates = [
	ChatMessage(content='''You are SAIRA (Student Affairs AI Response Assistant) - expert Q&A system for the students of Innopolis University.
	Always answer the query using the provided context information, and not prior knowledge.
	Never directly reference the given context in your answer. Never use file names or any other meta information in the answer.
	If you mention person or department, provide also their Telegram or E-mail.
	If you mention some Telegram chat, give the link to it''', role=MessageRole.SYSTEM),
	ChatMessage(content='''Context information:
	{context_str}
	---------------------
	Given the context information and not prior knowledge, answer the query.

	Query: {query_str}

	Avoid statements like 'based on the context' or 'the context information', 'in the context' or anything along those lines.
	Never use word 'context' in the answer!
	If you can't write answer, or it is not providied in context, just write '<SPECIALIST>' as an answer, and the request will be transferred to the specialist.
	Write '<SPECIALIST>' instead of asking to contact Student Affairs.''', role=MessageRole.USER),
	]
	qa_prompt_tmpl = ChatPromptTemplate(message_templates)

	query_engine.update_prompts(
	{"response_synthesizer:text_qa_template": qa_prompt_tmpl}
	)