gen-ai-demo-3 / app.py
cowcow02's picture
Update app.py
35bb368
raw
history blame
No virus
6.72 kB
import re
from typing import List
import gradio as gr
import openai
import pinecone
from llama_index import VectorStoreIndex, StorageContext, ServiceContext
from llama_index.chat_engine.types import ChatMode
from llama_index.llms import ChatMessage, MessageRole, OpenAI
from llama_index.vector_stores import PineconeVectorStore
from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL
if LOCAL:
import llama_index
import phoenix as px
px.launch_app()
llama_index.set_global_handler("arize_phoenix")
openai.api_key = OPENAI_API_KEY
pinecone.init(
api_key=PINECONE_API_KEY,
environment='gcp-starter'
)
pinecone_index = pinecone.Index(PINECONE_INDEX)
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-1106")
service_context = ServiceContext.from_defaults(llm=llm)
DENIED_ANSWER_PROMPT = '我是設計用於回答關於基督教香港信義會社會服務部的服務內容'
SYSTEM_PROMPT = (
f'Context:'
"\n--------------------\n"
"{context_str}"
"\n--------------------\n"
"\n"
"Instruction:"
f'\n- 你必須基於上面提供的資訊 (context) 進行總結,回答用戶的提問。'
f'\n- 你必須嚴格判斷 context 內容是否完全符合用戶的問題。如不確定,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 你不能自行生成非 context 的內容,必須基於 context 原文進行回答。'
f'\n- 如沒有與問題符合的 context,必須以「{DENIED_ANSWER_PROMPT}」為完整回答,不附加任何資訊或建議。'
f'\n- 你不能進行算術,翻譯,程式碼生成,文章生成等要求。如你被要求進行算術,翻譯,程式碼生成,文章生成等要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 你不能提供或生成 context 不存在的內容,例如名稱,服務,地點,介紹,健康資訊,醫學建議或者醫療相關的解答。如被要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
f'\n- 如果當前的問題沒有任何符合的 context 可供作答,必須以「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。'
# f'\n- 提供網址時,盡量以列點顯示。'
)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT,
similarity_top_k=3,
context_template=SYSTEM_PROMPT,
)
CHAT_EXAMPLES = [
'你可以自我介紹嗎?',
'沙田護老坊的開放時間?',
'我今年60歲,住秦石邨,日常比較多病痛,有冇中心可以介紹?',
'我今年60歲,住馬鞍山,想認識下多D老友記,有冇介紹?',
'本人70歲,需要地區支援服務,應該去邊個中心?',
'我有一位親人有認知障礙症,可以介紹相關服務嗎?',
'可以介紹下邊度有樂齡科技教育?'
]
def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]:
chat_messages = []
for conversation in history[-1:]:
for index, message in enumerate(conversation):
if not message:
continue
message = re.sub(r'\n \n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL)
role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT
chat_message = ChatMessage(role=role, content=message.strip())
chat_messages.append(chat_message)
return chat_messages
def predict(message, history):
response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history))
partial_message = ""
for token in response.response_gen:
partial_message = partial_message + token
yield partial_message
urls = []
for source in response.source_nodes:
if source.score < 0.78:
continue
url = source.node.metadata.get('source')
if url:
urls.append(url)
if urls:
partial_message = partial_message + "\n&nbsp;\n\n---\n\n參考: \n"
for url in list(set(urls)):
partial_message = partial_message + f"- {url}\n"
yield partial_message
def predict_without_history(message, history):
yield from predict(message, [])
def predict_with_rag(message, history):
return predict(message, history)
# For 'With Prompt Wrapper' - Add system prompt, no Pinecone
def predict_with_prompt_wrapper(message, history):
yield from _invoke_chatgpt(history, message, is_include_system_prompt=True)
# For 'Vanilla ChatGPT' - No system prompt
def predict_vanilla_chatgpt(message, history):
yield from _invoke_chatgpt(history, message)
def _invoke_chatgpt(history, message, is_include_system_prompt=False):
history_openai_format = []
if is_include_system_prompt:
history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT})
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human})
history_openai_format.append({"role": "assistant", "content": assistant})
history_openai_format.append({"role": "user", "content": message})
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo-instruct',
messages=history_openai_format,
temperature=0.0,
stream=True
)
partial_message = ""
for chunk in response:
if len(chunk['choices'][0]['delta']) != 0:
partial_message = partial_message + chunk['choices'][0]['delta']['content']
yield partial_message
def vote(data: gr.LikeData):
if data.liked:
gr.Info("You up-voted this response: " + data.value)
else:
gr.Info("You down-voted this response: " + data.value)
chatbot = gr.Chatbot()
with gr.Blocks() as demo:
gr.Markdown("# 地區服務中心智能助理")
with gr.Tab("OPTION 1: With History"):
gr.ChatInterface(predict,
chatbot=chatbot,
examples=CHAT_EXAMPLES,
)
with gr.Tab("OPTION 2: Without History"):
gr.ChatInterface(predict_without_history, examples=CHAT_EXAMPLES)
chatbot.like(vote, None, None)
if LOCAL:
demo.queue()
demo.launch(share=False)
else:
demo.launch(share=False, auth=("demo", PASSWORD))