from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import torch
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
from textwrap import fill
import gradio as gr
import time


# Loading the PDF files
loader = PyPDFDirectoryLoader("data")
docs = loader.load()

# Splitting the text in smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(docs)

# Creating embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

query_result = embeddings.embed_query(texts[0].page_content)

# Saving the embeddings in the Chroma database
db = Chroma.from_documents(texts, embeddings, persist_directory="db")
results = db.similarity_search("Transformer models", k=2)

# Loading the transformer model
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)

# Create a configuration for text generation based on the specified model name
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)

# Set the maximum number of new tokens in the generated text to 1024.
# This limits the length of the generated output to 1024 tokens.
generation_config.max_new_tokens = 1024

# Set the temperature for text generation. Lower values (e.g., 0.0001) make output more deterministic, following likely predictions.
# Higher values make the output more random.
generation_config.temperature = 0.0001

# Set the top-p sampling value. A value of 0.95 means focusing on the most likely words that make up 95% of the probability distribution.
generation_config.top_p = 0.95

# Enable text sampling. When set to True, the model randomly selects words based on their probabilities, introducing randomness.
generation_config.do_sample = True

# Set the repetition penalty. A value of 1.15 discourages the model from repeating the same words or phrases too frequently in the output.
generation_config.repetition_penalty = 1.15


# Create a text generation pipeline using the initialized model, tokenizer, and generation configuration
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)

# Create a LangChain pipeline that wraps the text generation pipeline and set a specific temperature for generation
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

template_3 = """
<s>[INST] <<SYS>>
Act as a student counselor at Aalborg University Business School and answer the question at the end.
The answer should be about the master programs found in the provided documents ONLY.
The answer should be MAXIMUM 40 words.
Use the examples in {context} to generate the answer, without directly mentioning any of it.

<</SYS>>

{context}

N-shot prompting:
N-1
Q: How do I find out what masters degree I want to study
A: To determine which master's degree you would like to study, you should consider which business-related modules are within your interest, which modeules from the bachelor's degree did you find intresting?

N-2
Q: I liked the modules [input] in the bachelor, what masters could be relevant for me?
A: Based on your interests in [input], it may be beneficial to consider studying [output].
The curriculum for this program includes several modules that align with your
interests.

ReAct prompting:
Q: "how do i find out what masters degree i want to study"
A: “To determine which master's degree you would like to study, you should consider which business-related modules are within your interest,
which modules from the bachelor's degree did you find interesting?
Q: "I liked macro economics and organisation"
A: “Based on your interests in macroeconomics and organizations, it may be
beneficial to consider studying the Master of Science (MSc) in Economics and
Business Administration (Finance) program at Aalborg University Business School.
The curriculum for this program includes several modules that align with your
interests, such as "Network Theory and Analysis" and "Data-Driven Business
Modeling and Strategy". These modules cover topics related to macroeconomics and
organizational behavior, providing you with valuable insights and skills that
could help you achieve your career goals. Additionally, the program offers an
application-focused approach, allowing you to apply your knowledge to real-world
problems and develop practical solutions.”
Feedback: The advice should focus on unique modules in the 1st and 2nd semester for each master, as the 3rd semester modules are elective options for all masters.

{question} [/INST]
"""

prompt_3 = PromptTemplate(template=template_3, input_variables=["context", "question"])


qa_chain_3 = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_3},
)

def reply_bot(txt):
  bot_result = qa_chain_3(txt)
  return (bot_result["result"].strip())

bot_name = "Master Supervisor"

with gr.Blocks() as demo:
    gr.Markdown("### Master's Degree Program Advisor")
    gr.Markdown("I can help you find the master's degree program that's right for you. Ask me any question related to choosing a master's program.")

    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def reply_bot(message, chat_history):
      bot_result = qa_chain_3(message)
      chat_history.append((message, (bot_result["result"].strip()))),
      time.sleep(2),
      return "", chat_history

    msg.submit(reply_bot, [msg, chatbot], [msg, chatbot])

demo.queue().launch(share=True)