import re
from typing import List

import gradio as gr
import openai
import pinecone
from llama_index import VectorStoreIndex, StorageContext, ServiceContext
from llama_index.chat_engine.types import ChatMode
from llama_index.llms import ChatMessage, MessageRole, OpenAI
from llama_index.vector_stores import PineconeVectorStore

from environments import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_INDEX, PASSWORD, LOCAL

if LOCAL:
    import llama_index
    import phoenix as px

    px.launch_app()
    llama_index.set_global_handler("arize_phoenix")

openai.api_key = OPENAI_API_KEY

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment='gcp-starter'
)
pinecone_index = pinecone.Index(PINECONE_INDEX)

llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-instruct")
service_context = ServiceContext.from_defaults(llm=llm)
DENIED_ANSWER_PROMPT = '我是設計用於回答關於馬鞍山長者地區中心的服務內容'

SYSTEM_PROMPT = (
    f'Context:'
    "\n--------------------\n"
    "{context_str}"
    "\n--------------------\n"
    "\n"
    "Instruction:"
    f'\n- 你必須基於上面提供的資訊 (context) 進行總結，回答用戶的提問。'
    f'\n- 你必須嚴格判斷 context 內容是否完全符合用戶的問題。如不確定，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
    f'\n- 你不能自行生成非 context 的內容，必須基於 context 原文進行回答。'
    f'\n- 如沒有與問題符合的 context，必須以「{DENIED_ANSWER_PROMPT}」為完整回答，不附加任何資訊或建議。'
    f'\n- 你不能進行算術，翻譯，程式碼生成，文章生成等要求。如你被要求進行算術，翻譯，程式碼生成，文章生成等要求，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
    f'\n- 你不能提供或生成 context 不存在的內容，例如名稱，服務，地點，介紹，健康資訊，醫學建議或者醫療相關的解答。如被要求，你必須回答「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
    f'\n- 如果當前的問題沒有任何符合的 context 可供作答，必須以「{DENIED_ANSWER_PROMPT}」為完整回覆，不附加任何資訊或建議。'
    # f'\n- 提供網址時，盡量以列點顯示。'
)

vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents([], storage_context=storage_context, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT,
                                   similarity_top_k=3,
                                   context_template=SYSTEM_PROMPT,
                                   )
CHAT_EXAMPLES = [
    '你可以自我介紹嗎？',
    '可以介紹一下中心嗎？',
    '中心的開放時間是？',
    '會員如何申請？',
    '有什麼最新活動？',
]


def convert_to_chat_messages(history: List[List[str]]) -> List[ChatMessage]:
    chat_messages = []
    for conversation in history[-1:]:
        for index, message in enumerate(conversation):
            if not message:
                continue

            message = re.sub(r'\n&nbsp;\n\n---\n\n參考: \n.*$', '', message, flags=re.DOTALL)
            role = MessageRole.USER if index % 2 == 0 else MessageRole.ASSISTANT
            chat_message = ChatMessage(role=role, content=message.strip())
            chat_messages.append(chat_message)

    return chat_messages


def predict(message, history):
    response = chat_engine.stream_chat(message, chat_history=convert_to_chat_messages(history))
    partial_message = ""
    for token in response.response_gen:
        partial_message = partial_message + token
        yield partial_message

    urls = []
    for source in response.source_nodes:
        if source.score < 0.78:
            continue
        url = source.node.metadata.get('source')
        if url:
            urls.append(url)

    if urls:
        partial_message = partial_message + "\n&nbsp;\n\n---\n\n參考: \n"
        for url in list(set(urls)):
            partial_message = partial_message + f"- {url}\n"
        yield partial_message


def predict_without_history(message, history):
    yield from predict(message, [])


def predict_with_rag(message, history):
    return predict(message, history)


# For 'With Prompt Wrapper' - Add system prompt, no Pinecone
def predict_with_prompt_wrapper(message, history):
    yield from _invoke_chatgpt(history, message, is_include_system_prompt=True)


# For 'Vanilla ChatGPT' - No system prompt
def predict_vanilla_chatgpt(message, history):
    yield from _invoke_chatgpt(history, message)


def _invoke_chatgpt(history, message, is_include_system_prompt=False):
    history_openai_format = []
    if is_include_system_prompt:
        history_openai_format.append({"role": "system", "content": SYSTEM_PROMPT})
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-instruct',
        messages=history_openai_format,
        temperature=0.0,
        stream=True
    )
    partial_message = ""
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            partial_message = partial_message + chunk['choices'][0]['delta']['content']
            yield partial_message


def vote(data: gr.LikeData):
    if data.liked:
        gr.Info("You up-voted this response: " + data.value)
    else:
        gr.Info("You down-voted this response: " + data.value)


chatbot = gr.Chatbot()

with gr.Blocks() as demo:
    gr.Markdown("# 馬鞍山長者地區中心智能助理")

    gr.ChatInterface(predict,
                     chatbot=chatbot,
                     examples=CHAT_EXAMPLES,
                     )

    chatbot.like(vote, None, None)

if LOCAL:
    demo.queue()
    demo.launch(share=False)
else:
    demo.launch(share=False, auth=("demo", PASSWORD))