File size: 6,579 Bytes
04461f7
51a6025
 
 
 
 
10e8db2
51a6025
10e8db2
51a6025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e8db2
 
e8c2750
 
 
 
 
 
 
 
 
 
 
141e6ff
 
 
 
dd03454
e8c2750
10e8db2
51a6025
 
10e8db2
e8c2750
 
 
 
51a6025
 
e8c2750
51a6025
 
 
 
 
5543281
51a6025
 
 
 
e8c2750
04461f7
 
 
 
 
 
 
 
 
51a6025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594e99d
9a6746d
594e99d
 
51a6025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e8db2
51a6025
10e8db2
51a6025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594e99d
 
 
 
 
 
 
51a6025
 
 
4409b71
51a6025
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import re
from typing import List

import gradio as gr
import openai
import pinecone
from llama_index import VectorStoreIndex, StorageContext, ServiceContext
from llama_index.chat_engine.types import ChatMode
from llama_index.llms import ChatMessage, MessageRole, OpenAI
from llama_index.vector_stores import PineconeVectorStore

from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL

if LOCAL:
    import llama_index
    import phoenix as px

    px.launch_app()
    llama_index.set_global_handler("arize_phoenix")

openai.api_key = OPENAI_API_KEY

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment='gcp-starter'
)
pinecone_index = pinecone.Index(PINECONE_INDEX)

llm = OpenAI(temperature=0.0, model="gpt-3.5-turbo-instruct")
service_context = ServiceContext.from_defaults(llm=llm)
DENIED_ANSWER_PROMPT = '我是設計用於回答關於信義會地區中心的服務內容'

SYSTEM_PROMPT = (
    f'Context:'
    "\n--------------------\n"
    "{context_str}"
    "\n--------------------\n"
    "\n"
    "Instruction:"
    f'\n- 你必須基於上面提供的資訊 (context) 回答用戶的提問。'
    f'\n- 你不能自行生成非 context 的內容,必須基於 context 原文進行回答。'
    f'\n- 如沒有與問題完全符合的 context,必須以「{DENIED_ANSWER_PROMPT}」為完整回答,不附加任何資訊或建議。'
    f'\n- 你不能進行算術,翻譯,程式碼生成,文章生成等要求。如你被要求進行算術,翻譯,程式碼生成,文章生成等要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
    f'\n- 你不能提供或生成 context 不存在的內容,例如服務,地點,健康資訊,醫學建議或者醫療相關的解答。如你被要求解答 context 不存在的內容,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
    f'\n- 如果當前的問題沒有任何符合的 context 可供作答,你必須以「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
    f'\n- 提供網址時,盡量以列點顯示。'
)

vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT,
                                   similarity_top_k=3,
                                   context_template=SYSTEM_PROMPT,
                                   )
CHAT_EXAMPLES = [
    '你可以自我介紹嗎?',
    '住香港仔,邊度可以搵長者服務?',
    '沙田護老坊的開放時間?',
    '我今年60歲,住秦石邨,日常比較多病痛,有冇中心可以介紹?',
    '我今年60歲,住馬鞍山,想認識下多D老友記,有冇介紹?',
    '本人70歲,需要地區支援服務,應該去邊個中心?',
    '我有一位親人有認知障礙症,可以介紹相關服務嗎?',
    '可以介紹下邊度有樂齡科技教育?'
]


def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]:
    chat_messages = []
    for conversation in history[-1:]:
        for index, message in enumerate(conversation):
            if not message:
                continue

            message = re.sub(r'\n \n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL)
            role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT
            chat_message = ChatMessage(role=role, content=message.strip())
            chat_messages.append(chat_message)

    return chat_messages


def predict(message, history):
    response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history))
    partial_message = ""
    for token in response.response_gen:
        partial_message = partial_message + token
        yield partial_message

    urls = []
    for source in response.source_nodes:
        if source.score < 0.78:
            continue
        url = source.node.metadata.get('source')
        if url:
            urls.append(url)

    if urls:
        partial_message = partial_message + "\n&nbsp;\n\n---\n\n參考: \n"
        for url in list(set(urls)):
            partial_message = partial_message + f"- {url}\n"
        yield partial_message


def predict_without_history(message, history):
    yield from predict(message, [])


def predict_with_rag(message, history):
    return predict(message, history)


# For 'With Prompt Wrapper' - Add system prompt, no Pinecone
def predict_with_prompt_wrapper(message, history):
    yield from _invoke_chatgpt(history, message, is_include_system_prompt=True)


# For 'Vanilla ChatGPT' - No system prompt
def predict_vanilla_chatgpt(message, history):
    yield from _invoke_chatgpt(history, message)


def _invoke_chatgpt(history, message, is_include_system_prompt=False):
    history_openai_format = []
    if is_include_system_prompt:
        history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT})
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-instruct',
        messages=history_openai_format,
        temperature=0.0,
        stream=True
    )
    partial_message = ""
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            partial_message = partial_message + chunk['choices'][0]['delta']['content']
            yield partial_message


def vote(data: gr.LikeData):
    if data.liked:
        gr.Info("You up-voted this response: " + data.value)
    else:
        gr.Info("You down-voted this response: " + data.value)


chatbot = gr.Chatbot()

with gr.Blocks() as demo:
    gr.Markdown("# 地區服務中心智能助理")

    with gr.Tab("OPTION 1: With History"):
        gr.ChatInterface(predict,
                         chatbot=chatbot,
                         examples=CHAT_EXAMPLES,
                         )
    with gr.Tab("OPTION 2: Without History"):
        gr.ChatInterface(predict_without_history, examples=CHAT_EXAMPLES)
    chatbot.like(vote, None, None)

if LOCAL:
    demo.queue()
    demo.launch(share=False)
else:
    demo.launch(share=False, auth=("demo", PASSWORD))