Spaces:
Build error
Build error
# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded | |
#================================================================================ | |
# Developer: Soumen Dey | |
# Assignment : Gen-ai/CEP-1 | |
# Env : Windows with CPU [No GPU] | |
# License : GPL | |
# - Steps: | |
# 1: Download the required model from LMStudio and save it to your local Disk | |
# 2: Install required python libs for the code | |
# Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio) | |
#---------------------------------------------------------------------------------- | |
import gradio as gr | |
import time | |
from llama_cpp import Llama | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.schema import Document | |
from langchain_community.embeddings import OllamaEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
# 1. Load your LLaMA 3 model | |
# | |
local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf" | |
model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf" | |
LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf" | |
MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf" | |
llm = Llama( | |
model_path = model_path_gguf, | |
n_ctx=2048, | |
n_threads=8, | |
stream=True | |
) | |
#-------------- Load the data ------------------------- | |
# Inserting PDF | |
from langchain_community.document_loaders import UnstructuredPDFLoader | |
from langchain_community.document_loaders import OnlinePDFLoader | |
data = "" | |
#load pdf | |
if local_doc_path: | |
loader = UnstructuredPDFLoader(file_path=local_doc_path) | |
data = loader.load() | |
print("loaded.") | |
else: | |
print("upload a pdf file") | |
#---------- End | |
#-------------- GET THE CHUNKS =----------------------- | |
#split and chunk | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=7500 | |
,chunk_overlap=100 | |
,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order | |
) | |
chunks = text_splitter.split_documents(data) | |
#------------------------------------------------------ | |
# 2. Build the vector DB (or load it from disk) | |
localIndex = "faiss_index_v1" | |
# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# Load local embedding model (use a real embedding model if possible) | |
embedding_model = HuggingFaceEmbeddings( | |
# model_name="hkunlp/instructor-large", - Full local GGUF | |
model_name = "sentence-transformers/all-MiniLM-L6-v2", | |
model_kwargs = {"device": "cpu"} | |
) | |
vector_db = FAISS.from_documents(chunks, embedding_model) | |
vector_db.save_local(localIndex) | |
# 3. Chat function with RAG + streaming | |
def chat_fn(message, history): | |
# Get context from vector store | |
docs_with_scores = vector_db.similarity_search_with_score(message, k=2) | |
context = "\n".join([doc.page_content for doc, score in docs_with_scores]) | |
# Truncate to 1000 characters (or adjust as needed) | |
context = context[:1000] | |
# Create prompt | |
prompt = "You are a helpful assistant. Use the context to answer questions.\n" | |
prompt += f"Context:\n{context}\n\n" | |
for user, bot in history: | |
prompt += f"User: {user}\nAssistant: {bot}\n" | |
prompt += f"User: {message}\nAssistant:" | |
# Generate with streaming (typing effect) | |
response = "" | |
for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True): | |
token = chunk["choices"][0]["text"] | |
response += token | |
yield response | |
time.sleep(0.02) | |
# 4. Launch the Gradio chat UI | |
# gr.ChatInterface( | |
# fn=chat_fn, | |
# title="LLaMA 3 + Local Vector DB Chat", | |
# description="powered by llama3/hf: (Rimbik) π€, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'" | |
# ,theme="soft", | |
# ).launch(share=True) | |
keys = [ | |
("File in process", "category1"), | |
("Model Name", "category1") | |
] | |
colors = { | |
"category1": "red", | |
"category2": "orange", | |
"category3": "yellow", | |
"category4": "green", | |
"category5": "blue", | |
"category6": "indigo", | |
} | |
desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) π€" | |
with gr.Blocks() as demo: | |
# highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors) | |
gr.ChatInterface( | |
fn=chat_fn, | |
title="LLaMA 3 πͺ + Local Vector DB Chat: π€", | |
description = desc | |
,theme="soft", | |
) | |
if __name__ == "__main__": | |
demo.launch(share=False) # Set True for live public url | |
#------------------------- EOF ---------------------------------------------------Date :May-4/2025 | |