# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded #================================================================================ # Developer: Soumen Dey # Assignment : Gen-ai/CEP-1 # Env : Windows with CPU [No GPU] # License : GPL # - Steps: # 1: Download the required model from LMStudio and save it to your local Disk # 2: Install required python libs for the code # Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio) #---------------------------------------------------------------------------------- import gradio as gr import time from llama_cpp import Llama from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.schema import Document from langchain_community.embeddings import OllamaEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma # 1. Load your LLaMA 3 model # local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf" model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf" LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf" MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf" llm = Llama( model_path = model_path_gguf, n_ctx=2048, n_threads=8, stream=True ) #-------------- Load the data ------------------------- # Inserting PDF from langchain_community.document_loaders import UnstructuredPDFLoader from langchain_community.document_loaders import OnlinePDFLoader data = "" #load pdf if local_doc_path: loader = UnstructuredPDFLoader(file_path=local_doc_path) data = loader.load() print("loaded.") else: print("upload a pdf file") #---------- End #-------------- GET THE CHUNKS =----------------------- #split and chunk text_splitter = RecursiveCharacterTextSplitter( chunk_size=7500 ,chunk_overlap=100 ,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order ) chunks = text_splitter.split_documents(data) #------------------------------------------------------ # 2. Build the vector DB (or load it from disk) localIndex = "faiss_index_v1" # embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Load local embedding model (use a real embedding model if possible) embedding_model = HuggingFaceEmbeddings( # model_name="hkunlp/instructor-large", - Full local GGUF model_name = "sentence-transformers/all-MiniLM-L6-v2", model_kwargs = {"device": "cpu"} ) vector_db = FAISS.from_documents(chunks, embedding_model) vector_db.save_local(localIndex) # 3. Chat function with RAG + streaming def chat_fn(message, history): # Get context from vector store docs_with_scores = vector_db.similarity_search_with_score(message, k=2) context = "\n".join([doc.page_content for doc, score in docs_with_scores]) # Truncate to 1000 characters (or adjust as needed) context = context[:1000] # Create prompt prompt = "You are a helpful assistant. Use the context to answer questions.\n" prompt += f"Context:\n{context}\n\n" for user, bot in history: prompt += f"User: {user}\nAssistant: {bot}\n" prompt += f"User: {message}\nAssistant:" # Generate with streaming (typing effect) response = "" for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True): token = chunk["choices"][0]["text"] response += token yield response time.sleep(0.02) # 4. Launch the Gradio chat UI # gr.ChatInterface( # fn=chat_fn, # title="LLaMA 3 + Local Vector DB Chat", # description="powered by llama3/hf: (Rimbik) 🤗, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'" # ,theme="soft", # ).launch(share=True) keys = [ ("File in process", "category1"), ("Model Name", "category1") ] colors = { "category1": "red", "category2": "orange", "category3": "yellow", "category4": "green", "category5": "blue", "category6": "indigo", } desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) 🤗" with gr.Blocks() as demo: # highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors) gr.ChatInterface( fn=chat_fn, title="LLaMA 3 🐪 + Local Vector DB Chat: 🤖", description = desc ,theme="soft", ) if __name__ == "__main__": demo.launch(share=False) # Set True for live public url #------------------------- EOF ---------------------------------------------------Date :May-4/2025