from langchain.agents.initialize import initialize_agent
from langchain.agents.tools import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.llms.openai import OpenAI
import re
import gradio as gr
import openai

def cut_dialogue_history(history_memory, keep_last_n_words=400):
    if history_memory is None or len(history_memory) == 0:
        return history_memory
    tokens = history_memory.split()
    n_tokens = len(tokens)
    print(f"history_memory:{history_memory}, n_tokens: {n_tokens}")
    if n_tokens < keep_last_n_words:
        return history_memory
    paragraphs = history_memory.split('\n')
    last_n_tokens = n_tokens
    while last_n_tokens >= keep_last_n_words:
        last_n_tokens -= len(paragraphs[0].split(' '))
        paragraphs = paragraphs[1:]
    return '\n' + '\n'.join(paragraphs)


class ConversationBot:
    def __init__(self):
        self.memory = ConversationBufferMemory(memory_key="chat_history", output_key='output')
        self.tools = []

    def run_text(self, text, state):
        self.agent.memory.buffer = cut_dialogue_history(self.agent.memory.buffer, keep_last_n_words=500)
        res = self.agent({"input": text.strip()})
        res['output'] = res['output'].replace("\\", "/")
        response = res['output'] #re.sub('(image/\S*png)', lambda m: f'![](/file={m.group(0)})*{m.group(0)}*', res['output'])
        state = state + [(text, response)]
        print(f"\nProcessed run_text, Input text: {text}\nCurrent state: {state}\n"
              f"Current Memory: {self.agent.memory.buffer}")
        return state, state


    def init_agent(self, openai_api_key, image_caption, dense_caption, video_caption, tags, state):
        chat_history =''
        PREFIX = "ChatVideo is a chatbot that chats with you based on video descriptions."
        FORMAT_INSTRUCTIONS = """
        When you have a response to say to the Human,  you MUST use the format:
        ```
        {ai_prefix}: [your response here]
        ```
        """
        SUFFIX = f"""You are a chatbot that conducts conversations based on video descriptions. You mainly answer based on the given video description, and you can also modify the content according to the tag information, and you can also answer the relevant knowledge of the person or object contained in the video. But you don't create a video plot out of nothing.

                Begin!

                Video tags are: {tags}

                The frame description of the video is: {image_caption}

                The dense caption of the video is: {dense_caption}

                The general description of the video is: {video_caption}"""+"""Previous conversation history {chat_history}

                New input: {input}

                {agent_scratchpad}
                """
        self.memory.clear()
        
        self.llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
        self.agent = initialize_agent(
            self.tools,
            self.llm,
            agent="conversational-react-description",
            verbose=True,
            memory=self.memory,
            return_intermediate_steps=True,
            agent_kwargs={'prefix': PREFIX, 'format_instructions': FORMAT_INSTRUCTIONS, 'suffix': SUFFIX}, )
        state = state + [("I upload a video, Please watch it first! ","I have watch this video, Let's chat!")]
        return gr.update(visible = True),state, state

if __name__=="__main__":
    import pdb
    pdb.set_trace()