import gradio as gr
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain import PromptTemplate
import re
import pandas as pd
from langchain.vectorstores import FAISS
import requests
from typing import List
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI

from langchain.llms.base import LLM
from typing import Optional, List, Mapping, Any

import ast
from utils import ClaudeLLM, extract_website_name, remove_numbers

embeddings = HuggingFaceEmbeddings()
db = FAISS.load_local('db_full', embeddings)

mp_docs = {}
llm = ClaudeLLM()
# ChatOpenAI(
#             temperature=0,
#             model='gpt-3.5-turbo-16k'
#         )


def add_text(history, text):

    print(history)
    history = history + [(text, None)]

    return history, ""

pipeline = {'claude': (ClaudeLLM(), 0), 'gpt-3.5': (ChatOpenAI(temperature=0,model='gpt-3.5-turbo-16k'), 65), 'gpt-4': (ChatOpenAI(temperature=0, model='gpt-4'), 30)}

def retrieve_thoughts(query, n):

    # print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
    docs_with_score = db.similarity_search_with_score(query = query, k = len(db.index_to_docstore_id.values()), fetch_k = len(db.index_to_docstore_id.values()))
    df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
    df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
    df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)

  # TO-DO: What if user query doesn't match what we provide as documents

    tier_1 = df[df['score'] < 0.7]
    tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]


    chunks_1 = tier_1.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
    tier_1_adjusted = tier_1.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
    tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
    tier_1_adjusted['content'] = chunks_1

    chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
    tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
    tier_2_adjusted['content'] = chunks_2

    if n:
      tier_1_adjusted = tier_1_adjusted[:min(len(tier_1_adjusted), n)]

    print(len(tier_1_adjusted))
  # tier_1 = [doc[0]  for doc in docs if ((doc[1] < 1))][:5]
  # tier_2 = [doc[0]  for doc in docs if ((doc[1] > 0.7)*(doc[1] < 1.5))][10:15]

    return {'tier 1':tier_1_adjusted, 'tier 2': tier_2.loc[:5]}

def qa_retrieve(query, llm):

    llm = pipeline["claude"][0]

    docs = ""

    global db
    print(db)

    global mp_docs
    thoughts = retrieve_thoughts(query, 0)
    if not(thoughts):

        if mp_docs:
            thoughts = mp_docs
    else:
        mp_docs = thoughts

    tier_1 = thoughts['tier 1']
    tier_2 = thoughts['tier 2']

    reference = tier_1[['ref', 'url', 'title']].to_dict('records')

    tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
    print(len(tier_1))
    tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)

    print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
    # print(f"DOCS RETRIEVED: {mp_docs.values}")

    # Cynthesis Generation
    session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
    task = """Your primary responsibility is to identify multiple themes from the given articles. For each theme detected, you are to present it under three separate categories:
    
    1. Theme Title - An easy-to-understand title that encapsulates the core idea of the theme extracted from the article.
    
    2. Theme Description - An expanded elaboration that explores the theme in detail based on the arguments and points provided in the article.
    
    3. Quotes related to theme - Locate and provide at least one compelling quote from the article that directly supports or showcases the theme you have identified. This quote should serve as a specific evidence or example from the article text that corresponds directly to the developed theme.
    
    The extracted themes should be written in structured manner, ensuring clarity and meaningful correlation between the themes and the articles. Make sure your analysis is rooted in the arguments given in the article. Avoid including personal opinions or making generalizations that are not explicitly supported by the articles. """


    prompt = PromptTemplate(
            input_variables=["query", "task", "session_prompt", "articles"],
            template="""
            You are a {session_prompt}
            {task}

            query: {query}

            Articles:
            {articles}

            
            The extracted themes should be written in structured manner, ensuring clarity and meaningful correlation between the themes and the articles. Make sure your analysis is rooted in the arguments given in the article. Avoid including personal opinions or making generalizations that are not explicitly supported by the articles.

            """,
        )


    # llm = BardLLM()
    chain = LLMChain(llm=llm, prompt = prompt)

    response = chain.run(query=query, articles="\n".join(tier_1), session_prompt = session_prompt, task = task)

    for i in range(5):
      response = response.replace(f'[{i}]', f"<span class='text-primary'>[{i}]</span>")

    # Generate related questions
    prompt_q = PromptTemplate(
            input_variables=[ "session_prompt", "articles"],
            template="""
            You are a {session_prompt}
            Give general/global questions related the following articles:

            Articles:
            {articles}


            Make sure not to ask specific questions, keep them general, short and concise.
            """,
        )

    chain_q = LLMChain(llm=ClaudeLLM(), prompt = prompt_q)

    questions = chain_q.run(session_prompt = session_prompt, articles = "\n".join(tier_2), )
    print(questions)
    questions = questions[questions.index('1'):]

    questions = [ remove_numbers(t).strip() for (i, t) in enumerate(questions.split('.'))  if len(t) > 5][:5]
    print(questions)

    # TO-DO: initiate models in another function, refactor code to be reusable

    # json_resp = {'cynthesis': response, 'questions': questions, 'Reference': reference}

    return response, {'Reference': reference}

def flush():
  return None

examples = [
    ["Will Russia win the war in Ukraine?"],

    ]

demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api",
                     inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"),
                     outputs=[gr.components.Textbox(lines=3, label="Themes"),
                             gr.components.JSON( label="Reference")],examples=examples)

demo.queue(concurrency_count = 4)
demo.launch()