Spaces:
Sleeping
Sleeping
File size: 6,263 Bytes
8c2d072 7a16087 8c2d072 7a16087 8c2d072 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import re
from typing import List
import gradio as gr
import openai
import pinecone
from llama_index import VectorStoreIndex, StorageContext, ServiceContext
from llama_index.chat_engine.types import ChatMode
from llama_index.llms import ChatMessage, MessageRole, OpenAI
from llama_index.vector_stores import PineconeVectorStore
from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL
if LOCAL:
import llama_index
import phoenix as px
px.launch_app()
llama_index.set_global_handler("arize_phoenix")
openai.api_key = OPENAI_API_KEY
pinecone.init(
api_key=PINECONE_API_KEY,
environment='gcp-starter'
)
pinecone_index = pinecone.Index(PINECONE_INDEX)
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-instruct")
service_context = ServiceContext.from_defaults(llm=llm)
DENIED_ANSWER_PROMPT = '我是設計用於回答關於馬鞍山長者地區中心的服務內容'
SYSTEM_PROMPT = (
f'Context:'
"\n--------------------\n"
"{context_str}"
"\n--------------------\n"
"\n"
"Instruction:"
f'\n- 你必須基於上面提供的資訊 (context) 進行總結,回答用戶的提問。'
f'\n- 你必須嚴格判斷 context 內容是否完全符合用戶的問題。如不確定,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 你不能自行生成非 context 的內容,必須基於 context 原文進行回答。'
f'\n- 如沒有與問題符合的 context,必須以「{DENIED_ANSWER_PROMPT}」為完整回答,不附加任何資訊或建議。'
f'\n- 你不能進行算術,翻譯,程式碼生成,文章生成等要求。如你被要求進行算術,翻譯,程式碼生成,文章生成等要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 你不能提供或生成 context 不存在的內容,例如名稱,服務,地點,介紹,健康資訊,醫學建議或者醫療相關的解答。如被要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 如果當前的問題沒有任何符合的 context 可供作答,必須以「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
# f'\n- 提供網址時,盡量以列點顯示。'
)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT,
similarity_top_k=3,
context_template=SYSTEM_PROMPT,
)
CHAT_EXAMPLES = [
'你可以自我介紹嗎?',
'可以介紹一下中心嗎?',
'中心的開放時間是?',
'會員如何申請?',
'有什麼最新活動?',
]
def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]:
chat_messages = []
for conversation in history[-1:]:
for index, message in enumerate(conversation):
if not message:
continue
message = re.sub(r'\n \n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL)
role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT
chat_message = ChatMessage(role=role, content=message.strip())
chat_messages.append(chat_message)
return chat_messages
def predict(message, history):
response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history))
partial_message = ""
for token in response.response_gen:
partial_message = partial_message + token
yield partial_message
urls = []
for source in response.source_nodes:
if source.score < 0.78:
continue
url = source.node.metadata.get('source')
if url:
urls.append(url)
if urls:
partial_message = partial_message + "\n \n\n---\n\n參考: \n"
for url in list(set(urls)):
partial_message = partial_message + f"- {url}\n"
yield partial_message
def predict_without_history(message, history):
yield from predict(message, [])
def predict_with_rag(message, history):
return predict(message, history)
# For 'With Prompt Wrapper' - Add system prompt, no Pinecone
def predict_with_prompt_wrapper(message, history):
yield from _invoke_chatgpt(history, message, is_include_system_prompt=True)
# For 'Vanilla ChatGPT' - No system prompt
def predict_vanilla_chatgpt(message, history):
yield from _invoke_chatgpt(history, message)
def _invoke_chatgpt(history, message, is_include_system_prompt=False):
history_openai_format = []
if is_include_system_prompt:
history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT})
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human})
history_openai_format.append({"role": "assistant", "content": assistant})
history_openai_format.append({"role": "user", "content": message})
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo-instruct',
messages=history_openai_format,
temperature=0.0,
stream=True
)
partial_message = ""
for chunk in response:
if len(chunk['choices'][0]['delta']) != 0:
partial_message = partial_message + chunk['choices'][0]['delta']['content']
yield partial_message
def vote(data: gr.LikeData):
if data.liked:
gr.Info("You up-voted this response: " + data.value)
else:
gr.Info("You down-voted this response: " + data.value)
chatbot = gr.Chatbot()
with gr.Blocks() as demo:
gr.Markdown("# 馬鞍山長者地區中心智能助理")
gr.ChatInterface(predict,
chatbot=chatbot,
examples=CHAT_EXAMPLES,
)
chatbot.like(vote, None, None)
if LOCAL:
demo.queue()
demo.launch(share=False)
else:
demo.launch(share=False, auth=("demo", PASSWORD))
|