# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded

#================================================================================
# Developer: Soumen Dey
# Assignment : Gen-ai/CEP-1
# Env  : Windows with CPU [No GPU]
# License : GPL
# - Steps:
#     1: Download the required model from LMStudio and save it to your local Disk
#     2: Install required python libs for the code

#   Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio)
#----------------------------------------------------------------------------------

import gradio as gr
import time
from llama_cpp import Llama
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# 1. Load your LLaMA 3 model
# 
local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf"
model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf"

LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf"
MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf"

llm = Llama(
    model_path = model_path_gguf,
    n_ctx=2048,
    n_threads=8,
    stream=True
)

#-------------- Load the data -------------------------
# Inserting PDF
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

data = ""

#load pdf
if local_doc_path:
	loader = UnstructuredPDFLoader(file_path=local_doc_path)
	data   = loader.load()
	print("loaded.")
else:
	print("upload a pdf file")
#---------- End


#-------------- GET THE CHUNKS =-----------------------
#split and chunk
text_splitter = RecursiveCharacterTextSplitter(
          chunk_size=7500
         ,chunk_overlap=100
         ,separators=["\n\n", "\n", ".", " ", ""],  # Tries these in order
    )

chunks = text_splitter.split_documents(data)

#------------------------------------------------------

# 2. Build the vector DB (or load it from disk)
localIndex = "faiss_index_v1"

# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load local embedding model (use a real embedding model if possible)
embedding_model = HuggingFaceEmbeddings(
    # model_name="hkunlp/instructor-large",  - Full local GGUF
    model_name   = "sentence-transformers/all-MiniLM-L6-v2", 
    model_kwargs = {"device": "cpu"}
)

vector_db = FAISS.from_documents(chunks, embedding_model)
vector_db.save_local(localIndex)


# 3. Chat function with RAG + streaming
def chat_fn(message, history):
    # Get context from vector store
    docs_with_scores = vector_db.similarity_search_with_score(message, k=2)
    context = "\n".join([doc.page_content for doc, score in docs_with_scores])
    
    # Truncate to 1000 characters (or adjust as needed)
    context = context[:1000]

    # Create prompt
    prompt = "You are a helpful assistant. Use the context to answer questions.\n"
    prompt += f"Context:\n{context}\n\n"
    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    # Generate with streaming (typing effect)
    response = ""
    for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True):
        token = chunk["choices"][0]["text"]
        response += token
        yield response
        time.sleep(0.02)

# 4. Launch the Gradio chat UI
# gr.ChatInterface(
#       fn=chat_fn, 
#       title="LLaMA 3 + Local Vector DB Chat",
#       description="powered by  llama3/hf: (Rimbik) 🤗, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN  GENERATIVE AI AND MACHINE LEARNING'"
#       ,theme="soft",
# ).launch(share=True)


keys = [
        ("File in process", "category1"),
        ("Model Name", "category1")
]
colors = {
        "category1": "red",
        "category2": "orange",
        "category3": "yellow",
        "category4": "green",
        "category5": "blue",
        "category6": "indigo",
    }

desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by  llama-3/hf: (Rimbik) 🤗"

with gr.Blocks() as demo:
    # highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors)
    gr.ChatInterface(
        fn=chat_fn, 
        title="LLaMA 3 🐪 + Local Vector DB Chat: 🤖",
        description = desc
        ,theme="soft",
    )    

if __name__ == "__main__":
    demo.launch(share=False) # Set True for live public url

#------------------------- EOF ---------------------------------------------------Date :May-4/2025