import re from typing import List import gradio as gr import openai import pinecone from llama_index import VectorStoreIndex, StorageContext, ServiceContext from llama_index.chat_engine.types import ChatMode from llama_index.llms import ChatMessage, MessageRole, OpenAI from llama_index.vector_stores import PineconeVectorStore from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL if LOCAL: import llama_index import phoenix as px px.launch_app() llama_index.set_global_handler("arize_phoenix") openai.api_key = OPENAI_API_KEY pinecone.init( api_key=PINECONE_API_KEY, environment='gcp-starter' ) pinecone_index = pinecone.Index(PINECONE_INDEX) llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-instruct") service_context = ServiceContext.from_defaults(llm=llm) DENIED_ANSWER_PROMPT = '我是設計用於回答關於馬鞍山長者地區中心的服務內容' SYSTEM_PROMPT = ( f'Context:' "\n--------------------\n" "{context_str}" "\n--------------------\n" "\n" "Instruction:" f'\n- 你必須基於上面提供的資訊 (context) 進行總結,回答用戶的提問。' f'\n- 你必須嚴格判斷 context 內容是否完全符合用戶的問題。如不確定,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。' f'\n- 你不能自行生成非 context 的內容,必須基於 context 原文進行回答。' f'\n- 如沒有與問題符合的 context,必須以「{DENIED_ANSWER_PROMPT}」為完整回答,不附加任何資訊或建議。' f'\n- 你不能進行算術,翻譯,程式碼生成,文章生成等要求。如你被要求進行算術,翻譯,程式碼生成,文章生成等要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。' f'\n- 你不能提供或生成 context 不存在的內容,例如名稱,服務,地點,介紹,健康資訊,醫學建議或者醫療相關的解答。如被要求,你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。' f'\n- 如果當前的問題沒有任何符合的 context 可供作答,必須以「{DENIED_ANSWER_PROMPT}」為完整回覆,不附加任何資訊或建議。' # f'\n- 提供網址時,盡量以列點顯示。' ) vector_store = PineconeVectorStore(pinecone_index=pinecone_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context) chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT, similarity_top_k=3, context_template=SYSTEM_PROMPT, ) CHAT_EXAMPLES = [ '你可以自我介紹嗎?', '可以介紹一下中心嗎?', '中心的開放時間是?', '會員如何申請?', '有什麼最新活動?', ] def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]: chat_messages = [] for conversation in history[-1:]: for index, message in enumerate(conversation): if not message: continue message = re.sub(r'\n \n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL) role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT chat_message = ChatMessage(role=role, content=message.strip()) chat_messages.append(chat_message) return chat_messages def predict(message, history): response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history)) partial_message = "" for token in response.response_gen: partial_message = partial_message + token yield partial_message urls = [] for source in response.source_nodes: if source.score < 0.78: continue url = source.node.metadata.get('source') if url: urls.append(url) if urls: partial_message = partial_message + "\n \n\n---\n\n參考: \n" for url in list(set(urls)): partial_message = partial_message + f"- {url}\n" yield partial_message def predict_without_history(message, history): yield from predict(message, []) def predict_with_rag(message, history): return predict(message, history) # For 'With Prompt Wrapper' - Add system prompt, no Pinecone def predict_with_prompt_wrapper(message, history): yield from _invoke_chatgpt(history, message, is_include_system_prompt=True) # For 'Vanilla ChatGPT' - No system prompt def predict_vanilla_chatgpt(message, history): yield from _invoke_chatgpt(history, message) def _invoke_chatgpt(history, message, is_include_system_prompt=False): history_openai_format = [] if is_include_system_prompt: history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT}) for human, assistant in history: history_openai_format.append({"role": "user", "content": human}) history_openai_format.append({"role": "assistant", "content": assistant}) history_openai_format.append({"role": "user", "content": message}) response = openai.ChatCompletion.create( model='gpt-3.5-turbo-instruct', messages=history_openai_format, temperature=0.0, stream=True ) partial_message = "" for chunk in response: if len(chunk['choices'][0]['delta']) != 0: partial_message = partial_message + chunk['choices'][0]['delta']['content'] yield partial_message def vote(data: gr.LikeData): if data.liked: gr.Info("You up-voted this response: " + data.value) else: gr.Info("You down-voted this response: " + data.value) chatbot = gr.Chatbot() with gr.Blocks() as demo: gr.Markdown("# 馬鞍山長者地區中心智能助理") gr.ChatInterface(predict, chatbot=chatbot, examples=CHAT_EXAMPLES, ) chatbot.like(vote, None, None) if LOCAL: demo.queue() demo.launch(share=False) else: demo.launch(share=False, auth=("demo", PASSWORD))