# -*- coding: utf-8 -*-
"""SimpleChatBot_OpenSourceModel_WithUI.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1q7EXhcR6gncrcwySFbN7u9fOIwTc4LtD

##*** Note : *** You will be NOT be charged for this exercise. Everything is OpenSource!

### This notebook presents how to make a simple conversational chatbot using Open Source language model that we will download from hugging-face hub

### Fix the UTF-8 encoding
"""


"""### Install the python packages. They are need to execute necessary to make the program work"""

"""### Import the necessary libraries"""

from langchain.llms.huggingface_pipeline import HuggingFacePipeline
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.embeddings import (OpenAIEmbeddings, HuggingFaceEmbeddings)
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

"""### Select and download the model from Hugging face

#### Hugging face hub contains a lot of pre-trained AI models related to computer vision, NLP, etc. For our task, we would need to use a text-generation model. Follow the steps below to choose and download a model

1. Go to this link -> https://huggingface.co/models?pipeline_tag=text-generation&sort=trending

2. For this example, we will be using the Mitsral 7B Instruct v0.2 [https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2]
"""

torch.set_default_device("cuda")

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             device_map='auto',
                                             torch_dtype="auto",
                                             load_in_4bit=True,
                                             trust_remote_code=True,
                                             low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

"""### Setup a text generation pipeline

"""

from transformers import pipeline

pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    temperature = 0.3,
    top_k = 50,
    top_p = 0.95,
    max_new_tokens=3072,
    repetition_penalty = 1.2
)

"""### Create an llm object"""

llm = HuggingFacePipeline(pipeline = pipe)

"""### Create a simple prompt tempelate using Langchain framework"""

template = """
"<s>[INST] You are a question and answering bot
    You always respond with a funny twist, and keep your
    answers short. Now answer this Question : {question}.
    To keep you more stateful, you also get help with previous
    chat history : {chat_history}[/INST]
"""
prompt = PromptTemplate(template=template, input_variables=["question", "chat_history"])

"""### Create an llm chain"""

llm_chain = LLMChain(prompt=prompt, llm=llm)

"""### Try invoking the LLM, with a simple chain"""

def ask_me_chat_completions(query, chat_history, llm_chain):
    response = llm_chain.run({"question":query,"chat_history":chat_history})
    return response

"""### Create a UI for more interactive conversation!

#### **Excercise **
1. Can you make the UI look better ?
2. Can you include more customizations (for example, setting up something like OpenAI playground ? Check out this link for how it looks like -> https://platform.openai.com/playground?mode=chat)
3. Host your application by creating space on HuggingFaceHub


"""

import gradio as gr

# Initialize chat history
chat_history = []

def chat_interface(query):
    global chat_history
    if query:  # Ensure the query is not empty
        response = ask_me_chat_completions(query, chat_history, llm_chain)
        formatted_response = f"Q: {query}\nA: {response}"  # Format the response
        chat_history.append(formatted_response)  # Append the formatted response to the history
        return formatted_response  # Return only the latest Q&A for display
    return ""  # Return an empty string if the query is empty

# Create the Gradio interface
with gr.Blocks() as demo:
    with gr.Tab("Chat"):
        query_input = gr.Textbox(label="Ask your questions here", placeholder="Type your question and press submit...")
        submit_button = gr.Button("Submit")
        response_output = gr.Textbox(label="Response", interactive=False, lines=6, value="", placeholder="Your answer will appear here...")

    submit_button.click(chat_interface, inputs=query_input, outputs=response_output)

# Launch the Gradio app
demo.launch()