File size: 4,793 Bytes
726cf6e
5504904
726cf6e
 
 
 
 
 
 
 
5504904
726cf6e
 
a7d801f
726cf6e
 
5504904
726cf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5504904
726cf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5504904
 
726cf6e
5504904
726cf6e
5504904
726cf6e
 
5504904
726cf6e
 
 
 
 
 
 
5504904
726cf6e
 
5504904
 
726cf6e
 
 
 
 
5504904
726cf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded

#================================================================================
# Developer: Soumen Dey
# Assignment : Gen-ai/CEP-1
# Env  : Windows with CPU [No GPU]
# License : GPL
# - Steps:
#     1: Download the required model from LMStudio and save it to your local Disk
#     2: Install required python libs for the code

#   Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio)
#----------------------------------------------------------------------------------

import gradio as gr
import time
from llama_cpp import Llama
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# 1. Load your LLaMA 3 model
# 
local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf"
model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf"

LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf"
MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf"

llm = Llama(
    model_path = model_path_gguf,
    n_ctx=2048,
    n_threads=8,
    stream=True
)

#-------------- Load the data -------------------------
# Inserting PDF
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

data = ""

#load pdf
if local_doc_path:
	loader = UnstructuredPDFLoader(file_path=local_doc_path)
	data   = loader.load()
	print("loaded.")
else:
	print("upload a pdf file")
#---------- End


#-------------- GET THE CHUNKS =-----------------------
#split and chunk
text_splitter = RecursiveCharacterTextSplitter(
          chunk_size=7500
         ,chunk_overlap=100
         ,separators=["\n\n", "\n", ".", " ", ""],  # Tries these in order
    )

chunks = text_splitter.split_documents(data)

#------------------------------------------------------

# 2. Build the vector DB (or load it from disk)
localIndex = "faiss_index_v1"

# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load local embedding model (use a real embedding model if possible)
embedding_model = HuggingFaceEmbeddings(
    # model_name="hkunlp/instructor-large",  - Full local GGUF
    model_name   = "sentence-transformers/all-MiniLM-L6-v2", 
    model_kwargs = {"device": "cpu"}
)

vector_db = FAISS.from_documents(chunks, embedding_model)
vector_db.save_local(localIndex)


# 3. Chat function with RAG + streaming
def chat_fn(message, history):
    # Get context from vector store
    docs_with_scores = vector_db.similarity_search_with_score(message, k=2)
    context = "\n".join([doc.page_content for doc, score in docs_with_scores])
    
    # Truncate to 1000 characters (or adjust as needed)
    context = context[:1000]

    # Create prompt
    prompt = "You are a helpful assistant. Use the context to answer questions.\n"
    prompt += f"Context:\n{context}\n\n"
    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    # Generate with streaming (typing effect)
    response = ""
    for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True):
        token = chunk["choices"][0]["text"]
        response += token
        yield response
        time.sleep(0.02)

# 4. Launch the Gradio chat UI
# gr.ChatInterface(
#       fn=chat_fn, 
#       title="LLaMA 3 + Local Vector DB Chat",
#       description="powered by  llama3/hf: (Rimbik) πŸ€—, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN  GENERATIVE AI AND MACHINE LEARNING'"
#       ,theme="soft",
# ).launch(share=True)


keys = [
        ("File in process", "category1"),
        ("Model Name", "category1")
]
colors = {
        "category1": "red",
        "category2": "orange",
        "category3": "yellow",
        "category4": "green",
        "category5": "blue",
        "category6": "indigo",
    }

desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by  llama-3/hf: (Rimbik) πŸ€—"

with gr.Blocks() as demo:
    # highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors)
    gr.ChatInterface(
        fn=chat_fn, 
        title="LLaMA 3 πŸͺ + Local Vector DB Chat: πŸ€–",
        description = desc
        ,theme="soft",
    )    

if __name__ == "__main__":
    demo.launch(share=False) # Set True for live public url

#------------------------- EOF ---------------------------------------------------Date :May-4/2025