File size: 3,241 Bytes
2810627
 
 
 
 
 
 
78af57e
2810627
 
 
b87de8f
 
2810627
 
 
 
 
 
 
b87de8f
 
2810627
78af57e
2810627
b87de8f
2810627
 
b87de8f
2810627
 
 
 
 
 
 
b87de8f
2810627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78af57e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    load_index_from_storage
)
from llama_index.prompts import ChatPromptTemplate, ChatMessage, MessageRole
from llama_index.vector_stores import SimpleVectorStore
from llama_index.llms import Ollama, OpenAI
import os


DOCS_DIR = "./raw"
PERSIST_DIR = './persist'


def load_documents():
    documents = SimpleDirectoryReader(DOCS_DIR).load_data()
    return documents

def build_service_context():
    # llm = Ollama(model='mistral')
    llm = OpenAI(model="gpt-4-1106-preview")
    return ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")

def build_index(documents, service_context):
    persist_dir = os.path.abspath(PERSIST_DIR)

    if os.path.exists(persist_dir + '/index_store.json'):  # Load
        print('Loading index...')
        # Solving issue with naming
        old_name = '/default__vector_store.json'
        new_name = '/vector_store.json'
        if os.path.exists(persist_dir + old_name):
            os.rename(persist_dir + old_name, persist_dir + new_name)

        storage_context = StorageContext.from_defaults(
            vector_store=SimpleVectorStore.from_persist_dir(persist_dir=persist_dir),
            persist_dir=persist_dir,
        )
        index = load_index_from_storage(storage_context, service_context=service_context)
    else:  # Create
        print('Creaing index...')
        storage_context = StorageContext.from_defaults(
            vector_store=SimpleVectorStore(),
        )
        index = VectorStoreIndex.from_documents(
            documents,
            service_context=service_context,
            storage_context=storage_context
        )
        # storage_context.persist(persist_dir=persist_dir)
        index.storage_context.persist(persist_dir=persist_dir)
    return index

def change_prompts(query_engine):
    message_templates = [
        ChatMessage(content='''You are SAIRA (Student Affairs AI Response Assistant) - expert Q&A system for the students of Innopolis University.
Always answer the query using the provided context information, and not prior knowledge.
Never directly reference the given context in your answer. Never use file names or any other meta information in the answer.
If you mention person or department, provide also their Telegram or E-mail.
If you mention some Telegram chat, give the link to it''', role=MessageRole.SYSTEM),
        ChatMessage(content='''Context information:
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query. 

Query: {query_str}

Avoid statements like 'based on the context' or 'the context information', 'in the context' or anything along those lines. 
Never use word 'context' in the answer!
If you can't write answer, or it is not providied in context, just write '<SPECIALIST>' as an answer, and the request will be transferred to the specialist.
Write '<SPECIALIST>' instead of asking to contact Student Affairs.''', role=MessageRole.USER),
    ]
    qa_prompt_tmpl = ChatPromptTemplate(message_templates)

    query_engine.update_prompts(
        {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
    )