Spaces:
Build error
Build error
| # pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded | |
| #================================================================================ | |
| # Developer: Soumen Dey | |
| # Assignment : Gen-ai/CEP-1 | |
| # Env : Windows with CPU [No GPU] | |
| # License : GPL | |
| # - Steps: | |
| # 1: Download the required model from LMStudio and save it to your local Disk | |
| # 2: Install required python libs for the code | |
| # Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio) | |
| #---------------------------------------------------------------------------------- | |
| import gradio as gr | |
| import time | |
| from llama_cpp import Llama | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| from langchain_community.embeddings import OllamaEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| # 1. Load your LLaMA 3 model | |
| # | |
| local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf" | |
| model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf" | |
| LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf" | |
| MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf" | |
| llm = Llama( | |
| model_path = model_path_gguf, | |
| n_ctx=2048, | |
| n_threads=8, | |
| stream=True | |
| ) | |
| #-------------- Load the data ------------------------- | |
| # Inserting PDF | |
| from langchain_community.document_loaders import UnstructuredPDFLoader | |
| from langchain_community.document_loaders import OnlinePDFLoader | |
| data = "" | |
| #load pdf | |
| if local_doc_path: | |
| loader = UnstructuredPDFLoader(file_path=local_doc_path) | |
| data = loader.load() | |
| print("loaded.") | |
| else: | |
| print("upload a pdf file") | |
| #---------- End | |
| #-------------- GET THE CHUNKS =----------------------- | |
| #split and chunk | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=7500 | |
| ,chunk_overlap=100 | |
| ,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order | |
| ) | |
| chunks = text_splitter.split_documents(data) | |
| #------------------------------------------------------ | |
| # 2. Build the vector DB (or load it from disk) | |
| localIndex = "faiss_index_v1" | |
| # embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| # Load local embedding model (use a real embedding model if possible) | |
| embedding_model = HuggingFaceEmbeddings( | |
| # model_name="hkunlp/instructor-large", - Full local GGUF | |
| model_name = "sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs = {"device": "cpu"} | |
| ) | |
| vector_db = FAISS.from_documents(chunks, embedding_model) | |
| vector_db.save_local(localIndex) | |
| # 3. Chat function with RAG + streaming | |
| def chat_fn(message, history): | |
| # Get context from vector store | |
| docs_with_scores = vector_db.similarity_search_with_score(message, k=2) | |
| context = "\n".join([doc.page_content for doc, score in docs_with_scores]) | |
| # Truncate to 1000 characters (or adjust as needed) | |
| context = context[:1000] | |
| # Create prompt | |
| prompt = "You are a helpful assistant. Use the context to answer questions.\n" | |
| prompt += f"Context:\n{context}\n\n" | |
| for user, bot in history: | |
| prompt += f"User: {user}\nAssistant: {bot}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # Generate with streaming (typing effect) | |
| response = "" | |
| for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True): | |
| token = chunk["choices"][0]["text"] | |
| response += token | |
| yield response | |
| time.sleep(0.02) | |
| # 4. Launch the Gradio chat UI | |
| # gr.ChatInterface( | |
| # fn=chat_fn, | |
| # title="LLaMA 3 + Local Vector DB Chat", | |
| # description="powered by llama3/hf: (Rimbik) π€, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'" | |
| # ,theme="soft", | |
| # ).launch(share=True) | |
| keys = [ | |
| ("File in process", "category1"), | |
| ("Model Name", "category1") | |
| ] | |
| colors = { | |
| "category1": "red", | |
| "category2": "orange", | |
| "category3": "yellow", | |
| "category4": "green", | |
| "category5": "blue", | |
| "category6": "indigo", | |
| } | |
| desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) π€" | |
| with gr.Blocks() as demo: | |
| # highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors) | |
| gr.ChatInterface( | |
| fn=chat_fn, | |
| title="LLaMA 3 πͺ + Local Vector DB Chat: π€", | |
| description = desc | |
| ,theme="soft", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=False) # Set True for live public url | |
| #------------------------- EOF ---------------------------------------------------Date :May-4/2025 | |