rimspace01 / app.py
RimJames's picture
Update app.py
726cf6e verified
# pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir >> upgraded
#================================================================================
# Developer: Soumen Dey
# Assignment : Gen-ai/CEP-1
# Env : Windows with CPU [No GPU]
# License : GPL
# - Steps:
# 1: Download the required model from LMStudio and save it to your local Disk
# 2: Install required python libs for the code
# Note: Tech stack: llama3 and gradio (assistant: local llama amd LMStudio)
#----------------------------------------------------------------------------------
import gradio as gr
import time
from llama_cpp import Llama
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
# 1. Load your LLaMA 3 model
#
local_doc_path = "E:/OLLAAMA/code/pdfChat/pdf/the_nestle_hr_policy_pdf_2012.pdf"
model_path_gguf = "C:/Users/soume/.lmstudio/models/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct-Q4_K_M-GGUF/patronus-lynx-8b-instruct-q4_k_m.gguf"
LOCAL_FILE = "the_nestle_hr_policy_pdf_2012.pdf"
MODEL_NAME = "patronus-lynx-8b-instruct-q4_k_m.gguf"
llm = Llama(
model_path = model_path_gguf,
n_ctx=2048,
n_threads=8,
stream=True
)
#-------------- Load the data -------------------------
# Inserting PDF
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
data = ""
#load pdf
if local_doc_path:
loader = UnstructuredPDFLoader(file_path=local_doc_path)
data = loader.load()
print("loaded.")
else:
print("upload a pdf file")
#---------- End
#-------------- GET THE CHUNKS =-----------------------
#split and chunk
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=7500
,chunk_overlap=100
,separators=["\n\n", "\n", ".", " ", ""], # Tries these in order
)
chunks = text_splitter.split_documents(data)
#------------------------------------------------------
# 2. Build the vector DB (or load it from disk)
localIndex = "faiss_index_v1"
# embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load local embedding model (use a real embedding model if possible)
embedding_model = HuggingFaceEmbeddings(
# model_name="hkunlp/instructor-large", - Full local GGUF
model_name = "sentence-transformers/all-MiniLM-L6-v2",
model_kwargs = {"device": "cpu"}
)
vector_db = FAISS.from_documents(chunks, embedding_model)
vector_db.save_local(localIndex)
# 3. Chat function with RAG + streaming
def chat_fn(message, history):
# Get context from vector store
docs_with_scores = vector_db.similarity_search_with_score(message, k=2)
context = "\n".join([doc.page_content for doc, score in docs_with_scores])
# Truncate to 1000 characters (or adjust as needed)
context = context[:1000]
# Create prompt
prompt = "You are a helpful assistant. Use the context to answer questions.\n"
prompt += f"Context:\n{context}\n\n"
for user, bot in history:
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {message}\nAssistant:"
# Generate with streaming (typing effect)
response = ""
for chunk in llm(prompt, max_tokens=512, stop=["User:"], stream=True):
token = chunk["choices"][0]["text"]
response += token
yield response
time.sleep(0.02)
# 4. Launch the Gradio chat UI
# gr.ChatInterface(
# fn=chat_fn,
# title="LLaMA 3 + Local Vector DB Chat",
# description="powered by llama3/hf: (Rimbik) πŸ€—, \nsearch anything for the pdf 'PROFESSIONAL CERTIFICATE COURSE IN GENERATIVE AI AND MACHINE LEARNING'"
# ,theme="soft",
# ).launch(share=True)
keys = [
("File in process", "category1"),
("Model Name", "category1")
]
colors = {
"category1": "red",
"category2": "orange",
"category3": "yellow",
"category4": "green",
"category5": "blue",
"category6": "indigo",
}
desc = f"File in process:{LOCAL_FILE}, Model Name :{MODEL_NAME}, powered by llama-3/hf: (Rimbik) πŸ€—"
with gr.Blocks() as demo:
# highlighted_text = gr.HighlightedText(value=header, labels=keys, colors=colors)
gr.ChatInterface(
fn=chat_fn,
title="LLaMA 3 πŸͺ + Local Vector DB Chat: πŸ€–",
description = desc
,theme="soft",
)
if __name__ == "__main__":
demo.launch(share=False) # Set True for live public url
#------------------------- EOF ---------------------------------------------------Date :May-4/2025