# Imports

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.document_loaders import CSVLoader
from langchain_openai import OpenAIEmbeddings
from langchain import PromptTemplate
from langchain_openai import OpenAI
from time import time
import gradio as gr
import pandas as pd
import numpy as np
import getpass
import pickle
import time
import os

os.environ['OPENAI_API_KEY']
read_key = os.environ.get('HF_TOKEN', None)

# embedding = GPT4AllEmbeddings()
embedding = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf")

def get_openai_embedding(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response['data'][0]['embedding']


# CSV 
# ========================================
# save
db_path_csv = "./data/penn_course_csv"
# vectordb_faiss_csv.save_local(db_path_csv)
# load
vectorstore_faiss_csv = FAISS.load_local(db_path_csv, embedding, allow_dangerous_deserialization=True)
# vectorstore_faiss_csv = FAISS.load_local(db_path_csv, embedding_function=get_openai_embedding, allow_dangerous_deserialization=True)
# ========================================

# WEB
# ========================================
# save
db_path_web = "./data/penn_curriculum_web"
# vectordb_faiss_web.save_local(db_path_web)
# load
vectorstore_faiss_web = FAISS.load_local(db_path_web, embedding, allow_dangerous_deserialization=True)
# vectorstore_faiss_web = FAISS.load_local(db_path_web, embedding_function=get_openai_embedding, allow_dangerous_deserialization=True)
# ========================================

from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125",temperature=0)
# llm = ChatOpenAI(model_name="gpt-4-turbo",temperature=0)

# Setting up separate retriever
retriever_csv = MultiQueryRetriever.from_llm(
    retriever=vectorstore_faiss_csv.as_retriever(search_type="mmr",
                                    search_kwargs={"k":35}),
    llm=llm
)
retriever_web = MultiQueryRetriever.from_llm(
    retriever=vectorstore_faiss_web.as_retriever(search_type="mmr",
                                    search_kwargs={"k":2}),
    llm=llm
)

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

from openai import OpenAI
client = OpenAI()

qa_prompt = PromptTemplate(
    input_variables=['query','contexts'],
    template = """
    You are a course recommendation system that analyze user's interest 
    and query the vector database to create a personalized course recommendation 
    for the user. Answer the question based on the context below where the context is
    the most similar result of the courses that matches the user query. Generate as detailed
    and accurate response as possible and do not limite the number of responses. If the
    question cannot be answered using the information provided answer with 'I don't know'
    Make sure to include course code, title, description and reasoning for recommending the course in the answer. 

    Context: {context}

    Question: {query},
    """,
)

qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

def summarize_response(strings):
    combined_string = '\n'.join(strings)

    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        # model="gpt-4-turbo",
        messages=[
            {"role":"system", "content":"""
             Please summarize the following input into a detailed statement that 
             captures the most important information. Make sure to extract the all the 
             course code and course title as well as their prerequisite information as well as the reasoning for suggesting
             the courses that were recommended from the given string. Do not consider 'I don't know' response but
             make sure to include as many responses as possible in the summary. Make sure the tone of the summary is as a school councelor but
             do not include phrase such as 'as a school councelor' or 'as a councelor'. Provide the summary in extensive detail and organized.
             """},
            {"role":"user","content":combined_string}
        ]
    )

    summary = response.choices[0].message.content
    return summary


def query_retriever_mod_question_seperate(question, n=5):
    retrieved_answers = []

    i = 0

    for i in range(0,n):
        print(f"{i+1} iteration")
        # revised_question = gpt_improved_query(question)
        revised_question=question
        # print(f"improved question is \n {revised_question}")
        start = time.time()

        # first get relevant doc from csv
        docs_csv = retriever_csv.get_relevant_documents(
            query=revised_question
        )

        # second get relevant doc from curriculum
        docs_web = retriever_web.get_relevant_documents(
            query=revised_question
        )


        docs_csv.extend(docs_web)

        out = qa_chain.invoke(
            input={
                "query": revised_question,
                "context": "\n---\n".join([d.page_content for d in docs_csv])
            }
        )

        retrieved_answers.append(out["text"])
        end = time.time()
        print("completed", "time:", end - start, "sec")
    
    summary = summarize_response(retrieved_answers)
    
    return summary, retrieved_answers, docs_csv, docs_web

def call_rag(question):

    sum, list, returned_doc_csv, returned_doc_web = query_retriever_mod_question_seperate(question)
    
    return sum

interface = gr.Interface(
  fn=call_rag,
  inputs="text",
  outputs="text",
  title="RAG Demo System - Penn Course Recommendation",
  description="""
  Try input below example prompts in the model!
  
  Example prompt:
  \n
  1. I want to major in Design. Can you provide all the required courses for the major?
  
  2. I want to major in Computer Science. Can you provide all the required courses for the major?
  """,
)

interface.launch(auth=('user',read_key))