Spaces:
Runtime error
Runtime error
import re | |
from typing import List | |
import gradio as gr | |
import openai | |
import pinecone | |
from llama_index import VectorStoreIndex, StorageContext | |
from llama_index.chat_engine.types import ChatMode | |
from llama_index.llms import ChatMessage, MessageRole | |
from llama_index.vector_stores import PineconeVectorStore | |
from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL | |
if LOCAL: | |
import llama_index | |
import phoenix as px | |
px.launch_app() | |
llama_index.set_global_handler("arize_phoenix") | |
openai.api_key = OPENAI_API_KEY | |
pinecone.init( | |
api_key=PINECONE_API_KEY, | |
environment='gcp-starter' | |
) | |
pinecone_index = pinecone.Index(PINECONE_INDEX) | |
vector_store = PineconeVectorStore(pinecone_index=pinecone_index) | |
storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
index = VectorStoreIndex.from_documents([], storage_context=storage_context) | |
chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT, similarity_top_k=2) | |
DENIED_ANSWER_PROMPT = '我是設計用於回答關於信義會地區中心的服務內容' | |
SYSTEM_PROMPT = f'你是信義會地區中心的智能助理,你必須基於提供的資訊進行總結,用以回答用戶的提問。' \ | |
f'1. 你不能自行生成非 context 已有的內容,必須基於 context 原文進行回答。' \ | |
f'2. 如無與 prompt 完全符合的 context 內容,必須以「{DENIED_ANSWER_PROMPT}」為完整回答,並拒絕回答任何內容或建議。' \ | |
# f'2. 你不能提供context沒有提及的內容,例如服務,地點,健康資訊,醫學建議或者醫療相關的解答。' \ | |
# f'如你被要求解答context沒有提及的地點,健康資訊,醫學建議或者醫療相關的問題,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆。' \ | |
# f'3. 你不能進行算術,翻譯,程式碼生成,文章生成等,與信義會地區中心無關的要求。' \ | |
# f'如你被要求進行算術,翻譯,程式碼生成,文章生成等,與信義會地區中心無關的要求,你可以回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不容許附加資訊。' \ | |
# f'4. 如果當前的 prompt 沒有任何 context 可供參考,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不容許附加資訊。' \ | |
# f'回覆請保持簡短,跟從提供的context, 不可自行添加回答內容。' | |
CHAT_EXAMPLES = [ | |
'你可以自我介紹嗎?', | |
'沙田護老坊的開放時間?', | |
'我今年60歲,住秦石邨,日常比較多病痛,有冇中心可以介紹?', | |
'我今年60歲,住馬鞍山,想認識下多D老友記,有冇介紹?', | |
'本人70歲,需要地區支援服務,應該去邊個中心?', | |
'我有一位親人有認知障礙症,可以介紹相關服務嗎?', | |
'可以介紹下邊度有樂齡科技教育?' | |
] | |
def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]: | |
chat_messages = [] | |
# for conversation in history[-1:]: | |
# for index, message in enumerate(conversation): | |
# if not message: | |
# continue | |
# | |
# message = re.sub(r'\n \n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL) | |
# role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT | |
# chat_message = ChatMessage(role=role, content=message.strip()) | |
# chat_messages.append(chat_message) | |
chat_messages.append(ChatMessage(role=MessageRole.SYSTEM, content=SYSTEM_PROMPT)) | |
return chat_messages | |
def predict(message, history): | |
response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history)) | |
partial_message = "" | |
for token in response.response_gen: | |
partial_message = partial_message + token | |
yield partial_message | |
urls = [] | |
for source in response.source_nodes: | |
if source.score < 0.78: | |
continue | |
url = source.node.metadata.get('source') | |
if url: | |
urls.append(url) | |
if urls: | |
partial_message = partial_message + "\n \n\n---\n\n參考: \n" | |
for url in list(set(urls)): | |
partial_message = partial_message + f"- {url}\n" | |
yield partial_message | |
def predict_with_rag(message, history): | |
return predict(message, history) | |
# For 'With Prompt Wrapper' - Add system prompt, no Pinecone | |
def predict_with_prompt_wrapper(message, history): | |
yield from _invoke_chatgpt(history, message, is_include_system_prompt=True) | |
# For 'Vanilla ChatGPT' - No system prompt | |
def predict_vanilla_chatgpt(message, history): | |
yield from _invoke_chatgpt(history, message) | |
def _invoke_chatgpt(history, message, is_include_system_prompt=False): | |
history_openai_format = [] | |
if is_include_system_prompt: | |
history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT}) | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human}) | |
history_openai_format.append({"role": "assistant", "content": assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
response = openai.ChatCompletion.create( | |
model='gpt-3.5-turbo', | |
messages=history_openai_format, | |
temperature=1.0, | |
stream=True | |
) | |
partial_message = "" | |
for chunk in response: | |
if len(chunk['choices'][0]['delta']) != 0: | |
partial_message = partial_message + chunk['choices'][0]['delta']['content'] | |
yield partial_message | |
def vote(data: gr.LikeData): | |
if data.liked: | |
gr.Info("You up-voted this response: " + data.value) | |
else: | |
gr.Info("You down-voted this response: " + data.value) | |
chatbot = gr.Chatbot() | |
with gr.Blocks() as demo: | |
gr.Markdown("# 地區服務中心智能助理") | |
gr.ChatInterface(predict, | |
chatbot=chatbot, | |
examples=CHAT_EXAMPLES, | |
) | |
chatbot.like(vote, None, None) | |
demo.queue() | |
if LOCAL: | |
demo.launch(share=False) | |
else: | |
demo.launch(share=False, auth=("demo", PASSWORD)) | |